{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 相似文章推荐-Word2Vec+Tfidf+LSH"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 设置 python 和 spark路径"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import findspark\n",
    "findspark.init(spark_home='/usr/local/spark/',python_path='/home/master/LoadData/venv/bin/python')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "SLF4J: Class path contains multiple SLF4J bindings.\n",
      "SLF4J: Found binding in [jar:file:/usr/local/spark/jars/slf4j-log4j12-1.7.30.jar!/org/slf4j/impl/StaticLoggerBinder.class]\n",
      "SLF4J: Found binding in [jar:file:/usr/local/hadoop/share/hadoop/common/lib/slf4j-log4j12-1.7.25.jar!/org/slf4j/impl/StaticLoggerBinder.class]\n",
      "SLF4J: See http://www.slf4j.org/codes.html#multiple_bindings for an explanation.\n",
      "SLF4J: Actual binding is of type [org.slf4j.impl.Log4jLoggerFactory]\n",
      "2023-08-04 09:11:28,603 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable\n",
      "Setting default log level to \"WARN\".\n",
      "To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).\n"
     ]
    }
   ],
   "source": [
    "from pyspark.sql import SparkSession\n",
    "import happybase\n",
    "import jieba\n",
    "from txdpy import get_chinese\n",
    "\n",
    "spark = SparkSession.builder.appName('SparkHBaseRDD').master('local[*]').getOrCreate()\n",
    "sc=spark.sparkContext"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 获取推荐文章的时间范围\n",
    "import time\n",
    "import datetime\n",
    "\n",
    "def recommend_time():\n",
    "    start_time = datetime.datetime.now() + datetime.timedelta(days = -1)\n",
    "    end_time = datetime.datetime.now() + datetime.timedelta(days = -3)\n",
    "    return time.mktime(time.strptime(str(start_time).split('.')[0], \"%Y-%m-%d %H:%M:%S\")), time.mktime(time.strptime(str(end_time).split('.')[0], \"%Y-%m-%d %H:%M:%S\"))\n",
    "\n",
    "# 判断当前时间戳是否在当天\n",
    "def now_time(timeStamp):\n",
    "    timeArray = time.localtime(timeStamp)\n",
    "    therStyleTime = time.strftime(\"%Y-%m-%d %H:%M:%S\", timeArray)\n",
    "    date_list  = therStyleTime.split(\" \")[0].split(\"-\")\n",
    "    t_year = int(date_list[0])\n",
    "    t_month = int(date_list[1])\n",
    "    t_day = int(date_list[2])\n",
    "\n",
    "    c_year = datetime.datetime.now().year\n",
    "    c_month = datetime.datetime.now().month\n",
    "    c_day = datetime.datetime.now().day\n",
    "\n",
    "    final = False\n",
    "    if c_year == t_year and c_month == t_month and c_day == t_day:\n",
    "        final = True\n",
    "\n",
    "    return final\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "connection=happybase.Connection('master')\n",
    "table=connection.table('news')\n",
    "g=table.scan()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 导入hbase数据并进行分词"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1690879533.0\n",
      "1691052333.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Building prefix dict from the default dictionary ...\n",
      "Loading model from cache /tmp/jieba.cache\n",
      "Loading model cost 0.878 seconds.\n",
      "Prefix dict has been built successfully.\n"
     ]
    }
   ],
   "source": [
    "bag_list = []\n",
    "s_list = []\n",
    "e_list = []\n",
    "\n",
    "end_time, start_time = recommend_time()\n",
    "\n",
    "print(start_time)\n",
    "print(end_time)\n",
    "for k, d in g:\n",
    "    if  start_time < int(str(d[b\"info:time\"], 'utf-8')) < end_time:\n",
    "        new_d = {}\n",
    "        new_d[\"id\"] = str(k, 'utf-8')\n",
    "        new_d[\"time\"] = str(d[b\"info:time\"], 'utf-8')\n",
    "        new_title = [i for i in get_chinese(str(d[b\"info:title\"], 'utf-8'))]\n",
    "        new_d[\"title\"] = [i for i in jieba.cut(str(''.join(new_title)), cut_all=True)]\n",
    "        new_d[\"ifE\"] = True\n",
    "        e_list.append(new_d)\n",
    "\n",
    "    if now_time(int(str(d[b\"info:time\"], 'utf-8'))):\n",
    "        new_d = {}\n",
    "        new_d[\"id\"] = str(k, 'utf-8')\n",
    "        new_d[\"time\"] = str(d[b\"info:time\"], 'utf-8')\n",
    "        new_title = [i for i in get_chinese(str(d[b\"info:title\"], 'utf-8'))]\n",
    "        new_d[\"title\"] = [i for i in jieba.cut(str(''.join(new_title)), cut_all=True)]\n",
    "        new_d[\"ifE\"] = False\n",
    "        s_list.append(new_d)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "DataFrame[id: string, ifE: boolean, time: string, title: array<string>]\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[Stage 0:>                                                          (0 + 1) / 1]\r"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "+-------------------+-----+----------+------------------------------+\n",
      "|                 id|  ifE|      time|                         title|\n",
      "+-------------------+-----+----------+------------------------------+\n",
      "|4785099261031219201|false|1691129100|  [创, 年来, 新高, 英国, 央...|\n",
      "|4785099261043802113|false|1691129040|[欧盟, 对白, 白俄, 白俄罗斯...|\n",
      "|4785099261052190721|false|1691128860|  [涿州, 救援, 见闻, 希望, ...|\n",
      "|4785099261064773633|false|1691136300|   [东北, 华北, 等, 地, 有,...|\n",
      "|4785099261073162241|false|1691140140| [特斯, 特斯拉, 斯拉, 召回,...|\n",
      "|4785099261085745153|false|1691139960| [媒体, 称意, 意大利, 大利,...|\n",
      "|4785099261094133761|false|1691139960|  [国家, 矿山, 安全, 监察, ...|\n",
      "|4785099261106716673|false|1691139900|  [东, 淀, 蓄, 滞洪, 滞洪区...|\n",
      "|4785099261115105281|false|1691139780|  [汽车, 被, 暴雨, 冲走, 丢...|\n",
      "|4785099261123493889|false|1691146680|  [降息, 降, 准, 是否, 还有...|\n",
      "|4785099261136076801|false|1691146620|[黑龙江, 黑龙江省, 龙江, 省...|\n",
      "|4785099261148659713|false|1691146380| [国家, 发改委, 政策, 效果,...|\n",
      "|4785099261157048321|false|1691146320|   [中新, 真, 探, 芒果, 有,...|\n",
      "|4785099261165436929|false|1691146260|[专访, 汇丰, 汇丰银行, 银行...|\n",
      "|4785099261178019841|false|1691146200|  [访, 解放, 解放军, 军中, ...|\n",
      "|4785099261186408449|false|1691145840|  [战友, 追忆, 牺牲, 消防, ...|\n",
      "|4785099261198991361|false|1691145240|  [世界, 计算, 大会, 月, 日...|\n",
      "|4785099261207379969|false|1691145240|  [国家, 发展, 改革, 委, 政...|\n",
      "|4785099261219962881|false|1691145240| [中消协, 消协, 点名, 名家,...|\n",
      "|4785099261228351489|false|1691145060|  [外, 媒, 伊斯, 伊斯兰, 国...|\n",
      "+-------------------+-----+----------+------------------------------+\n",
      "only showing top 20 rows\n",
      "\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "                                                                                \r"
     ]
    }
   ],
   "source": [
    "s = spark.createDataFrame(s_list)\n",
    "print(s)\n",
    "s.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "DataFrame[id: string, ifE: boolean, time: string, title: array<string>]\n",
      "+-------------------+----+----------+------------------------------+\n",
      "|                 id| ifE|      time|                         title|\n",
      "+-------------------+----+----------+------------------------------+\n",
      "|4784374250670653441|true|1690880880|  [云南, 南曲, 曲靖, 沾益, ...|\n",
      "|4784374250679042049|true|1690880760|  [炽热, 烽火, 照亮, 奋飞, ...|\n",
      "|4784374250708402177|true|1690880520|   [我, 是, 世代, 后, 巡护,...|\n",
      "|4784374250720985089|true|1690887420|  [跨越, 时空, 的, 八一, 追...|\n",
      "|4784374250733568001|true|1690887360|  [香港, 青年, 谈, 北上, 赴...|\n",
      "|4784374250741956609|true|1690887240|   [浙, 台, 棒, 垒球, 青少,...|\n",
      "|4784374250750345217|true|1690887120|  [多支, 力量, 参与, 救援, ...|\n",
      "|4784374250758733825|true|1690887120|  [暑运, 期间, 航空, 空出, ...|\n",
      "|4784374250767122433|true|1690887000|[澳门, 澳门大学, 大学, 连接...|\n",
      "|4784374250779705345|true|1690886400| [菜市, 菜市场, 市场, 成年,...|\n",
      "|4784374250788093953|true|1690885920|                  [八月, 你好]|\n",
      "|4784374250796482561|true|1690885860|  [中新, 真, 探, 方便, 方便...|\n",
      "|4784374250804871169|true|1690885800|[八一, 八一建军节, 建军, 建...|\n",
      "|4784374250817454081|true|1690885740|  [莫斯科, 再, 遭, 无人, 无...|\n",
      "|4784374250825842689|true|1690885740|  [最高, 最高检, 高检, 与, ...|\n",
      "|4784374250834231297|true|1690885500|  [鸟类, 叶, 食性, 如何, 起...|\n",
      "|4784374250842619905|true|1690885500| [青海, 海海, 海南, 海南州,...|\n",
      "|4784374250855202817|true|1690885500|[中央, 中央气象台, 气象, 气...|\n",
      "|4784374250863591425|true|1690885440|  [别, 被, 不确定性, 确定, ...|\n",
      "|4784374250876174337|true|1690885380|  [武警, 重庆, 总队, 举行, ...|\n",
      "+-------------------+----+----------+------------------------------+\n",
      "only showing top 20 rows\n",
      "\n"
     ]
    }
   ],
   "source": [
    "e = spark.createDataFrame(e_list)\n",
    "print(e)\n",
    "e.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## word2vec 训练分词数据"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2023-08-04 08:46:02,346 WARN netlib.BLAS: Failed to load implementation from: com.github.fommil.netlib.NativeSystemBLAS\n",
      "2023-08-04 08:46:02,351 WARN netlib.BLAS: Failed to load implementation from: com.github.fommil.netlib.NativeRefBLAS\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "+----+--------------------+\n",
      "|word|              vector|\n",
      "+----+--------------------+\n",
      "|伙伴|[0.01174047868698...|\n",
      "|人物|[0.01103250589221...|\n",
      "|晋级|[0.01269659958779...|\n",
      "|确保|[-0.0020791813731...|\n",
      "|冠军|[-0.0299158431589...|\n",
      "|  被|[0.01582421921193...|\n",
      "|  较|[0.00116901216097...|\n",
      "|首枚|[0.00864995270967...|\n",
      "|球队|[0.00110156252048...|\n",
      "|约合|[0.00454104971140...|\n",
      "|  玟|[-0.0099446652457...|\n",
      "|  传|[8.83549626450985...|\n",
      "|长沙|[-0.0077146096155...|\n",
      "|我们|[0.03531184419989...|\n",
      "|共生|[-0.0175799056887...|\n",
      "|步枪|[0.00691888015717...|\n",
      "|贵宾|[-0.0134050510823...|\n",
      "|真的|[0.01538161747157...|\n",
      "|防控|[0.00355687248520...|\n",
      "|街头|[0.00203225295990...|\n",
      "+----+--------------------+\n",
      "only showing top 20 rows\n",
      "\n"
     ]
    }
   ],
   "source": [
    "from pyspark.ml.feature import Word2VecModel\n",
    "\n",
    "w2v_model_e = Word2VecModel.load(\"models/word2vec_model/python.word2vec\")\n",
    "vectors = w2v_model_e.getVectors()\n",
    "vectors.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 关键词获取"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "from pyspark.ml.feature import CountVectorizerModel\n",
    "\n",
    "cv_model = CountVectorizerModel.load(\"models/CV.model\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "+-------------------+-----+----------+------------------------------+--------------------+\n",
      "|                 id|  ifE|      time|                         title|       countFeatures|\n",
      "+-------------------+-----+----------+------------------------------+--------------------+\n",
      "|4785099261031219201|false|1691129100|  [创, 年来, 新高, 英国, 央...|(14094,[141,230,5...|\n",
      "|4785099261043802113|false|1691129040|[欧盟, 对白, 白俄, 白俄罗斯...|(14094,[14,536,76...|\n",
      "|4785099261052190721|false|1691128860|  [涿州, 救援, 见闻, 希望, ...|(14094,[608,741,7...|\n",
      "|4785099261064773633|false|1691136300|   [东北, 华北, 等, 地, 有,...|(14094,[7,48,51,9...|\n",
      "|4785099261073162241|false|1691140140| [特斯, 特斯拉, 斯拉, 召回,...|(14094,[14,184,26...|\n",
      "|4785099261085745153|false|1691139960| [媒体, 称意, 意大利, 大利,...|(14094,[116,276,4...|\n",
      "|4785099261094133761|false|1691139960|  [国家, 矿山, 安全, 监察, ...|(14094,[11,80,142...|\n",
      "|4785099261106716673|false|1691139900|  [东, 淀, 蓄, 滞洪, 滞洪区...|(14094,[437,600,8...|\n",
      "|4785099261115105281|false|1691139780|  [汽车, 被, 暴雨, 冲走, 丢...|(14094,[52,57,65,...|\n",
      "|4785099261123493889|false|1691146680|  [降息, 降, 准, 是否, 还有...|(14094,[276,396,1...|\n",
      "|4785099261136076801|false|1691146620|[黑龙江, 黑龙江省, 龙江, 省...|(14094,[29,86,236...|\n",
      "|4785099261148659713|false|1691146380| [国家, 发改委, 政策, 效果,...|(14094,[6,11,16,2...|\n",
      "|4785099261157048321|false|1691146320|   [中新, 真, 探, 芒果, 有,...|(14094,[51,298,41...|\n",
      "|4785099261165436929|false|1691146260|[专访, 汇丰, 汇丰银行, 银行...|(14094,[0,1,21,26...|\n",
      "|4785099261178019841|false|1691146200|  [访, 解放, 解放军, 军中, ...|(14094,[1,116,145...|\n",
      "|4785099261186408449|false|1691145840|  [战友, 追忆, 牺牲, 消防, ...|(14094,[23,306,34...|\n",
      "|4785099261198991361|false|1691145240|  [世界, 计算, 大会, 月, 日...|(14094,[2,19,22,5...|\n",
      "|4785099261207379969|false|1691145240|  [国家, 发展, 改革, 委, 政...|(14094,[3,11,21,7...|\n",
      "|4785099261219962881|false|1691145240| [中消协, 消协, 点名, 名家,...|(14094,[90,111,30...|\n",
      "|4785099261228351489|false|1691145060|  [外, 媒, 伊斯, 伊斯兰, 国...|(14094,[12,155,16...|\n",
      "+-------------------+-----+----------+------------------------------+--------------------+\n",
      "only showing top 20 rows\n",
      "\n"
     ]
    }
   ],
   "source": [
    "cv_result_s = cv_model.transform(s)\n",
    "cv_result_s.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "+-------------------+----+----------+------------------------------+--------------------+\n",
      "|                 id| ifE|      time|                         title|       countFeatures|\n",
      "+-------------------+----+----------+------------------------------+--------------------+\n",
      "|4784374250670653441|true|1690880880|  [云南, 南曲, 曲靖, 沾益, ...|(14094,[94,99,125...|\n",
      "|4784374250679042049|true|1690880760|  [炽热, 烽火, 照亮, 奋飞, ...|(14094,[3396,4195...|\n",
      "|4784374250708402177|true|1690880520|   [我, 是, 世代, 后, 巡护,...|(14094,[105,123,1...|\n",
      "|4784374250720985089|true|1690887420|  [跨越, 时空, 的, 八一, 追...|(14094,[1,9,386,4...|\n",
      "|4784374250733568001|true|1690887360|  [香港, 青年, 谈, 北上, 赴...|(14094,[3,31,45,2...|\n",
      "|4784374250741956609|true|1690887240|   [浙, 台, 棒, 垒球, 青少,...|(14094,[53,75,83,...|\n",
      "|4784374250750345217|true|1690887120|  [多支, 力量, 参与, 救援, ...|(14094,[10,32,80,...|\n",
      "|4784374250758733825|true|1690887120|  [暑运, 期间, 航空, 空出, ...|(14094,[58,93,243...|\n",
      "|4784374250767122433|true|1690887000|[澳门, 澳门大学, 大学, 连接...|(14094,[74,95,362...|\n",
      "|4784374250779705345|true|1690886400| [菜市, 菜市场, 市场, 成年,...|(14094,[58,130,68...|\n",
      "|4784374250788093953|true|1690885920|                  [八月, 你好]|(14094,[2846,1103...|\n",
      "|4784374250796482561|true|1690885860|  [中新, 真, 探, 方便, 方便...|(14094,[261,298,4...|\n",
      "|4784374250804871169|true|1690885800|[八一, 八一建军节, 建军, 建...|(14094,[0,1,340,5...|\n",
      "|4784374250817454081|true|1690885740|  [莫斯科, 再, 遭, 无人, 无...|(14094,[32,169,40...|\n",
      "|4784374250825842689|true|1690885740|  [最高, 最高检, 高检, 与, ...|(14094,[23,29,145...|\n",
      "|4784374250834231297|true|1690885500|  [鸟类, 叶, 食性, 如何, 起...|(14094,[0,12,76,1...|\n",
      "|4784374250842619905|true|1690885500| [青海, 海海, 海南, 海南州,...|(14094,[22,72,186...|\n",
      "|4784374250855202817|true|1690885500|[中央, 中央气象台, 气象, 气...|(14094,[29,86,114...|\n",
      "|4784374250863591425|true|1690885440|  [别, 被, 不确定性, 确定, ...|(14094,[36,52,170...|\n",
      "|4784374250876174337|true|1690885380|  [武警, 重庆, 总队, 举行, ...|(14094,[22,109,18...|\n",
      "+-------------------+----+----------+------------------------------+--------------------+\n",
      "only showing top 20 rows\n",
      "\n"
     ]
    }
   ],
   "source": [
    "cv_result_e = cv_model.transform(e)\n",
    "cv_result_e.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "from pyspark.ml.feature import IDFModel\n",
    "\n",
    "idf_model = IDFModel.load(\"models/IDF.model\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "+-------------------+----+----------+------------------------------+--------------------+--------------------+\n",
      "|                 id| ifE|      time|                         title|       countFeatures|         idfFeatures|\n",
      "+-------------------+----+----------+------------------------------+--------------------+--------------------+\n",
      "|4784374250670653441|true|1690880880|  [云南, 南曲, 曲靖, 沾益, ...|(14094,[94,99,125...|(14094,[94,99,125...|\n",
      "|4784374250679042049|true|1690880760|  [炽热, 烽火, 照亮, 奋飞, ...|(14094,[3396,4195...|(14094,[3396,4195...|\n",
      "|4784374250708402177|true|1690880520|   [我, 是, 世代, 后, 巡护,...|(14094,[105,123,1...|(14094,[105,123,1...|\n",
      "|4784374250720985089|true|1690887420|  [跨越, 时空, 的, 八一, 追...|(14094,[1,9,386,4...|(14094,[1,9,386,4...|\n",
      "|4784374250733568001|true|1690887360|  [香港, 青年, 谈, 北上, 赴...|(14094,[3,31,45,2...|(14094,[3,31,45,2...|\n",
      "|4784374250741956609|true|1690887240|   [浙, 台, 棒, 垒球, 青少,...|(14094,[53,75,83,...|(14094,[53,75,83,...|\n",
      "|4784374250750345217|true|1690887120|  [多支, 力量, 参与, 救援, ...|(14094,[10,32,80,...|(14094,[10,32,80,...|\n",
      "|4784374250758733825|true|1690887120|  [暑运, 期间, 航空, 空出, ...|(14094,[58,93,243...|(14094,[58,93,243...|\n",
      "|4784374250767122433|true|1690887000|[澳门, 澳门大学, 大学, 连接...|(14094,[74,95,362...|(14094,[74,95,362...|\n",
      "|4784374250779705345|true|1690886400| [菜市, 菜市场, 市场, 成年,...|(14094,[58,130,68...|(14094,[58,130,68...|\n",
      "|4784374250788093953|true|1690885920|                  [八月, 你好]|(14094,[2846,1103...|(14094,[2846,1103...|\n",
      "|4784374250796482561|true|1690885860|  [中新, 真, 探, 方便, 方便...|(14094,[261,298,4...|(14094,[261,298,4...|\n",
      "|4784374250804871169|true|1690885800|[八一, 八一建军节, 建军, 建...|(14094,[0,1,340,5...|(14094,[0,1,340,5...|\n",
      "|4784374250817454081|true|1690885740|  [莫斯科, 再, 遭, 无人, 无...|(14094,[32,169,40...|(14094,[32,169,40...|\n",
      "|4784374250825842689|true|1690885740|  [最高, 最高检, 高检, 与, ...|(14094,[23,29,145...|(14094,[23,29,145...|\n",
      "|4784374250834231297|true|1690885500|  [鸟类, 叶, 食性, 如何, 起...|(14094,[0,12,76,1...|(14094,[0,12,76,1...|\n",
      "|4784374250842619905|true|1690885500| [青海, 海海, 海南, 海南州,...|(14094,[22,72,186...|(14094,[22,72,186...|\n",
      "|4784374250855202817|true|1690885500|[中央, 中央气象台, 气象, 气...|(14094,[29,86,114...|(14094,[29,86,114...|\n",
      "|4784374250863591425|true|1690885440|  [别, 被, 不确定性, 确定, ...|(14094,[36,52,170...|(14094,[36,52,170...|\n",
      "|4784374250876174337|true|1690885380|  [武警, 重庆, 总队, 举行, ...|(14094,[22,109,18...|(14094,[22,109,18...|\n",
      "+-------------------+----+----------+------------------------------+--------------------+--------------------+\n",
      "only showing top 20 rows\n",
      "\n"
     ]
    }
   ],
   "source": [
    "tfidf_result_e = idf_model.transform(cv_result_e)\n",
    "tfidf_result_e.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "+-------------------+-----+----------+------------------------------+--------------------+--------------------+\n",
      "|                 id|  ifE|      time|                         title|       countFeatures|         idfFeatures|\n",
      "+-------------------+-----+----------+------------------------------+--------------------+--------------------+\n",
      "|4785099261031219201|false|1691129100|  [创, 年来, 新高, 英国, 央...|(14094,[141,230,5...|(14094,[141,230,5...|\n",
      "|4785099261043802113|false|1691129040|[欧盟, 对白, 白俄, 白俄罗斯...|(14094,[14,536,76...|(14094,[14,536,76...|\n",
      "|4785099261052190721|false|1691128860|  [涿州, 救援, 见闻, 希望, ...|(14094,[608,741,7...|(14094,[608,741,7...|\n",
      "|4785099261064773633|false|1691136300|   [东北, 华北, 等, 地, 有,...|(14094,[7,48,51,9...|(14094,[7,48,51,9...|\n",
      "|4785099261073162241|false|1691140140| [特斯, 特斯拉, 斯拉, 召回,...|(14094,[14,184,26...|(14094,[14,184,26...|\n",
      "|4785099261085745153|false|1691139960| [媒体, 称意, 意大利, 大利,...|(14094,[116,276,4...|(14094,[116,276,4...|\n",
      "|4785099261094133761|false|1691139960|  [国家, 矿山, 安全, 监察, ...|(14094,[11,80,142...|(14094,[11,80,142...|\n",
      "|4785099261106716673|false|1691139900|  [东, 淀, 蓄, 滞洪, 滞洪区...|(14094,[437,600,8...|(14094,[437,600,8...|\n",
      "|4785099261115105281|false|1691139780|  [汽车, 被, 暴雨, 冲走, 丢...|(14094,[52,57,65,...|(14094,[52,57,65,...|\n",
      "|4785099261123493889|false|1691146680|  [降息, 降, 准, 是否, 还有...|(14094,[276,396,1...|(14094,[276,396,1...|\n",
      "|4785099261136076801|false|1691146620|[黑龙江, 黑龙江省, 龙江, 省...|(14094,[29,86,236...|(14094,[29,86,236...|\n",
      "|4785099261148659713|false|1691146380| [国家, 发改委, 政策, 效果,...|(14094,[6,11,16,2...|(14094,[6,11,16,2...|\n",
      "|4785099261157048321|false|1691146320|   [中新, 真, 探, 芒果, 有,...|(14094,[51,298,41...|(14094,[51,298,41...|\n",
      "|4785099261165436929|false|1691146260|[专访, 汇丰, 汇丰银行, 银行...|(14094,[0,1,21,26...|(14094,[0,1,21,26...|\n",
      "|4785099261178019841|false|1691146200|  [访, 解放, 解放军, 军中, ...|(14094,[1,116,145...|(14094,[1,116,145...|\n",
      "|4785099261186408449|false|1691145840|  [战友, 追忆, 牺牲, 消防, ...|(14094,[23,306,34...|(14094,[23,306,34...|\n",
      "|4785099261198991361|false|1691145240|  [世界, 计算, 大会, 月, 日...|(14094,[2,19,22,5...|(14094,[2,19,22,5...|\n",
      "|4785099261207379969|false|1691145240|  [国家, 发展, 改革, 委, 政...|(14094,[3,11,21,7...|(14094,[3,11,21,7...|\n",
      "|4785099261219962881|false|1691145240| [中消协, 消协, 点名, 名家,...|(14094,[90,111,30...|(14094,[90,111,30...|\n",
      "|4785099261228351489|false|1691145060|  [外, 媒, 伊斯, 伊斯兰, 国...|(14094,[12,155,16...|(14094,[12,155,16...|\n",
      "+-------------------+-----+----------+------------------------------+--------------------+--------------------+\n",
      "only showing top 20 rows\n",
      "\n"
     ]
    }
   ],
   "source": [
    "tfidf_result_s = idf_model.transform(cv_result_s)\n",
    "tfidf_result_s.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "def sort_by_tfidf(partition):\n",
    "    TOPK = 5\n",
    "    for row in partition:\n",
    "        _dict = list(zip(row.idfFeatures.indices, row.idfFeatures.values))\n",
    "        _dict = sorted(_dict, key=lambda x: x[1], reverse=True)\n",
    "        result = _dict[:TOPK]\n",
    "        for word_index, tfidf in result:\n",
    "            yield row.id, int(word_index), round(float(tfidf), 4)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "                                                                                \r"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "+-------------------+-----+-------+\n",
      "|                 id|index|weights|\n",
      "+-------------------+-----+-------+\n",
      "|4784374250670653441| 8216| 7.6121|\n",
      "|4784374250670653441| 9662| 7.6121|\n",
      "|4784374250670653441|11550| 7.6121|\n",
      "|4784374250670653441|12679| 7.6121|\n",
      "|4784374250670653441|13587| 7.6121|\n",
      "|4784374250679042049| 7909| 7.6121|\n",
      "|4784374250679042049| 8919| 7.6121|\n",
      "|4784374250679042049| 9733| 7.6121|\n",
      "|4784374250679042049|10908| 7.6121|\n",
      "|4784374250679042049| 3396| 6.9189|\n",
      "|4784374250708402177|12086| 7.6121|\n",
      "|4784374250708402177| 4765| 7.2066|\n",
      "|4784374250708402177| 4835| 7.2066|\n",
      "|4784374250708402177| 4906| 7.2066|\n",
      "|4784374250708402177| 3346| 6.9189|\n",
      "|4784374250720985089| 7352| 7.6121|\n",
      "|4784374250720985089|12991| 7.6121|\n",
      "|4784374250720985089|13224| 7.6121|\n",
      "|4784374250720985089| 4465| 7.2066|\n",
      "|4784374250720985089| 4982| 7.2066|\n",
      "+-------------------+-----+-------+\n",
      "only showing top 20 rows\n",
      "\n"
     ]
    }
   ],
   "source": [
    "keywords_by_tfidf_e = tfidf_result_e.rdd.mapPartitions(sort_by_tfidf).toDF([\"id\", \"index\", \"weights\"])\n",
    "keywords_by_tfidf_e.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "+-------------------+-----+-------+\n",
      "|                 id|index|weights|\n",
      "+-------------------+-----+-------+\n",
      "|4785099261031219201| 3735| 6.9189|\n",
      "|4785099261031219201| 2140| 6.5135|\n",
      "|4785099261031219201| 1677| 6.3593|\n",
      "|4785099261031219201| 1711| 6.3593|\n",
      "|4785099261031219201| 1752| 6.3593|\n",
      "|4785099261043802113| 5448| 7.2066|\n",
      "|4785099261043802113| 6022| 7.2066|\n",
      "|4785099261043802113| 3337| 6.9189|\n",
      "|4785099261043802113|  973| 5.9073|\n",
      "|4785099261043802113|  994| 5.9073|\n",
      "|4785099261052190721| 7924| 7.6121|\n",
      "|4785099261052190721|  608| 5.6662|\n",
      "|4785099261052190721|  741| 5.6662|\n",
      "|4785099261064773633|   90| 8.6699|\n",
      "|4785099261064773633| 1925| 6.3593|\n",
      "|4785099261064773633| 1349|  6.108|\n",
      "|4785099261064773633| 1090| 6.0027|\n",
      "|4785099261064773633| 1201| 6.0027|\n",
      "|4785099261073162241| 6748| 7.6121|\n",
      "|4785099261073162241| 2363| 6.5135|\n",
      "+-------------------+-----+-------+\n",
      "only showing top 20 rows\n",
      "\n"
     ]
    }
   ],
   "source": [
    "keywords_by_tfidf_s = tfidf_result_s.rdd.mapPartitions(sort_by_tfidf).toDF([\"id\", \"index\", \"weights\"])\n",
    "keywords_by_tfidf_s.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[['中国', 1.9387663817611542, 0],\n",
       " ['的', 2.2793708556672776, 1],\n",
       " ['在', 2.581651727540211, 2],\n",
       " ['发展', 2.7797838903608083, 3],\n",
       " ['大运', 3.0119320047680995, 4],\n",
       " ['成都', 3.0425466405877066, 5],\n",
       " ['将', 3.1347528344544404, 6],\n",
       " ['台风', 3.223832464508129, 7],\n",
       " ['大运会', 3.2618117125733455, 8],\n",
       " ['人', 3.4148877012708385, 9],\n",
       " ['北京', 3.356476939114424, 10],\n",
       " ['国家', 3.342392199232685, 11],\n",
       " ['新', 3.4300395062914406, 12],\n",
       " ['年', 3.377983144335387, 13],\n",
       " ['和', 3.385255903664467, 14],\n",
       " ['苏', 3.377983144335387, 15],\n",
       " ['半年', 3.377983144335387, 16],\n",
       " ['文化', 3.3999620510541626, 17],\n",
       " ['杜', 3.39258194375654, 18],\n",
       " ['世界', 3.44542442513092, 19],\n",
       " ['芮', 3.4073970295416807, 20],\n",
       " ['经济', 3.5345522050269276, 21],\n",
       " ['举行', 3.453206565572975, 22],\n",
       " ['与', 3.4769230921902907, 23],\n",
       " ['上半年', 3.5345522050269276, 24],\n",
       " ['上半', 3.5345522050269276, 25],\n",
       " ['为', 3.595706628180258, 26],\n",
       " ['国际', 3.6047564637001757, 27],\n",
       " ['产业', 3.6231056023683723, 28],\n",
       " ['发布', 3.6231056023683723, 29],\n",
       " ['福建', 3.632407995030686, 30],\n",
       " ['香港', 3.700066643504501, 31],\n",
       " ['已', 3.6608459303512193, 32],\n",
       " ['开幕', 3.7408886380247557, 33],\n",
       " ['启动', 3.6705078412629564, 34],\n",
       " ['习近平', 3.6705078412629564, 35],\n",
       " ['企业', 3.7408886380247557, 36],\n",
       " ['全国', 3.761942047222588, 37],\n",
       " ['新疆', 3.816600459760452, 38],\n",
       " ['浙江', 3.8393287108380085, 39],\n",
       " ['之', 3.8744200306492784, 40],\n",
       " ['质量', 3.9357889770255707, 41],\n",
       " ['建设', 3.910787674820153, 42],\n",
       " ['高质', 3.961431407638908, 43],\n",
       " ['高质量', 3.961431407638908, 44],\n",
       " ['青年', 3.9232101948187106, 45],\n",
       " ['应急', 4.014777388344201, 46],\n",
       " ['超', 3.974503489206261, 47],\n",
       " ['降雨', 4.042556952451276, 48],\n",
       " ['亿元', 3.9877487159562817, 49],\n",
       " ['月', 4.042556952451276, 50],\n",
       " ['有', 4.071130324895333, 51],\n",
       " ['被', 4.042556952451276, 52],\n",
       " ['运动', 4.056741587443233, 53],\n",
       " ['响应', 4.100544210101626, 54],\n",
       " ['学生', 4.0285707104765365, 55],\n",
       " ['上海', 4.100544210101626, 56],\n",
       " ['暴雨', 4.2621855616580415, 57],\n",
       " ['市场', 4.071130324895333, 58],\n",
       " ['增长', 4.085729124316486, 59],\n",
       " ['服务', 4.115582087466167, 60],\n",
       " ['活动', 4.071130324895333, 61],\n",
       " ['乡村', 4.14635374613292, 62],\n",
       " ['多', 4.085729124316486, 63],\n",
       " ['工作', 4.100544210101626, 64],\n",
       " ['专家', 4.085729124316486, 65],\n",
       " ['致', 4.115582087466167, 66],\n",
       " ['一', 4.130849559596955, 67],\n",
       " ['美国', 4.1781024444475, 68],\n",
       " ['创新', 4.14635374613292, 69],\n",
       " ['推动', 4.14635374613292, 70],\n",
       " ['女足', 4.476595433003497, 71],\n",
       " ['青海', 4.162102103101059, 72],\n",
       " ['交流', 4.19436296531928, 73],\n",
       " ['大学', 4.227699385586873, 74],\n",
       " ['少年', 4.19436296531928, 75],\n",
       " ['对', 4.19436296531928, 76],\n",
       " ['旅游', 4.2621855616580415, 77],\n",
       " ['持续', 4.210892267270491, 78],\n",
       " ['举办', 4.210892267270491, 79],\n",
       " ['安全', 4.316252782928317, 80],\n",
       " ['今年', 4.227699385586873, 81],\n",
       " ['山东', 4.2979036442601215, 82],\n",
       " ['台', 4.3734111967682665, 83],\n",
       " ['广西', 4.279885138757443, 84],\n",
       " ['人民', 4.33494491594047, 85],\n",
       " ['预警', 4.279885138757443, 86],\n",
       " ['助力', 4.2979036442601215, 87],\n",
       " ['青少年', 4.2979036442601215, 88],\n",
       " ['日', 4.353993110911165, 89],\n",
       " ['等', 4.33494491594047, 90],\n",
       " ['青少', 4.2979036442601215, 91],\n",
       " ['向', 4.316252782928317, 92],\n",
       " ['提升', 4.33494491594047, 93],\n",
       " ['影响', 4.316252782928317, 94],\n",
       " ['项目', 4.316252782928317, 95],\n",
       " ['到', 4.316252782928317, 96],\n",
       " ['以', 4.3734111967682665, 97],\n",
       " ['旅', 4.33494491594047, 98],\n",
       " ['级', 4.3734111967682665, 99],\n",
       " ['如何', 4.33494491594047, 100],\n",
       " ['丨', 4.33494491594047, 101],\n",
       " ['书记', 4.353993110911165, 102],\n",
       " ['同比', 4.3734111967682665, 103],\n",
       " ['行', 4.3734111967682665, 104],\n",
       " ['是', 4.353993110911165, 105],\n",
       " ['广东', 4.393213824064446, 106],\n",
       " ['了', 4.393213824064446, 107],\n",
       " ['山西', 4.413416531381966, 108],\n",
       " ['重庆', 4.413416531381966, 109],\n",
       " ['全球', 4.3734111967682665, 110],\n",
       " ['消费', 4.498574339722272, 111],\n",
       " ['米', 4.393213824064446, 112],\n",
       " ['科技', 4.476595433003497, 113],\n",
       " ['中央', 4.455089227782533, 114],\n",
       " ['强降雨', 4.413416531381966, 115],\n",
       " ['合作', 4.413416531381966, 116],\n",
       " ['调查', 4.455089227782533, 117],\n",
       " ['美', 4.434035818584701, 118],\n",
       " ['生态', 4.498574339722272, 119],\n",
       " ['网络', 4.476595433003497, 120],\n",
       " ['江西', 4.455089227782533, 121],\n",
       " ['打造', 4.434035818584701, 122],\n",
       " ['城市', 4.498574339722272, 123],\n",
       " ['四川', 4.498574339722272, 124],\n",
       " ['发生', 4.455089227782533, 125],\n",
       " ['台湾', 4.455089227782533, 126],\n",
       " ['推进', 4.455089227782533, 127],\n",
       " ['两岸', 4.498574339722272, 128],\n",
       " ['从', 4.476595433003497, 129],\n",
       " ['地', 4.498574339722272, 130],\n",
       " ['传承', 4.476595433003497, 131],\n",
       " ['让', 4.476595433003497, 132],\n",
       " ['保护', 4.498574339722272, 133],\n",
       " ['原', 4.54403671379903, 134],\n",
       " ['比增', 4.521047195574331, 135],\n",
       " ['绿色', 4.521047195574331, 136],\n",
       " ['体育', 4.6416751833629455, 137],\n",
       " ['第', 4.521047195574331, 138],\n",
       " ['港澳', 4.521047195574331, 139],\n",
       " ['世界杯', 4.616357375378656, 140],\n",
       " ['创', 4.567567211209224, 141],\n",
       " ['大', 4.521047195574331, 142],\n",
       " ['好', 4.567567211209224, 143],\n",
       " ['科学', 4.749888768003179, 144],\n",
       " ['联合', 4.54403671379903, 145],\n",
       " ['大学生', 4.54403671379903, 146],\n",
       " ['地区', 4.54403671379903, 147],\n",
       " ['调研', 4.567567211209224, 148],\n",
       " ['杭州', 4.6416751833629455, 149],\n",
       " ['来', 4.54403671379903, 150],\n",
       " ['中', 4.54403671379903, 151],\n",
       " ['后', 4.567567211209224, 152],\n",
       " ['研究', 4.616357375378656, 153],\n",
       " ['纪事', 4.567567211209224, 154],\n",
       " ['死亡', 4.591664762788284, 155],\n",
       " ['开展', 4.591664762788284, 156],\n",
       " ['中华', 4.616357375378656, 157],\n",
       " ['历史', 4.616357375378656, 158],\n",
       " ['上', 4.667650669766206, 159],\n",
       " ['大会', 4.616357375378656, 160],\n",
       " ['共', 4.616357375378656, 161],\n",
       " ['媒', 4.6416751833629455, 162],\n",
       " ['逾', 4.6416751833629455, 163],\n",
       " ['总统', 4.6416751833629455, 164],\n",
       " ['文', 4.667650669766206, 165],\n",
       " ['走进', 4.6416751833629455, 166],\n",
       " ['万', 4.721717891036482, 167],\n",
       " ['论坛', 4.6416751833629455, 168],\n",
       " ['再', 4.6416751833629455, 169],\n",
       " ['亮相', 4.667650669766206, 170],\n",
       " ['人员', 4.749888768003179, 171],\n",
       " ['环境', 4.694318916848368, 172],\n",
       " ['艺术', 4.749888768003179, 173],\n",
       " ['小', 4.667650669766206, 174],\n",
       " ['能', 4.694318916848368, 175],\n",
       " ['代表', 4.667650669766206, 176],\n",
       " ['能源', 4.721717891036482, 177],\n",
       " ['强台风', 4.694318916848368, 178],\n",
       " ['河北', 4.694318916848368, 179],\n",
       " ['名', 4.694318916848368, 180],\n",
       " ['三', 4.694318916848368, 181],\n",
       " ['贵州', 4.694318916848368, 182],\n",
       " ['男子', 4.694318916848368, 183],\n",
       " ['汽车', 4.77887630487643, 184],\n",
       " ['出口', 4.749888768003179, 185],\n",
       " ['周年', 4.721717891036482, 186],\n",
       " ['第一', 4.721717891036482, 187],\n",
       " ['动员', 4.749888768003179, 188],\n",
       " ['迎', 4.721717891036482, 189],\n",
       " ['港', 4.749888768003179, 190],\n",
       " ['首次', 4.721717891036482, 191],\n",
       " ['日本', 4.749888768003179, 192],\n",
       " ['就业', 4.8712496250074455, 193],\n",
       " ['融合', 4.721717891036482, 194],\n",
       " ['文明', 4.77887630487643, 195],\n",
       " ['管理', 4.77887630487643, 196],\n",
       " ['第二', 4.749888768003179, 197],\n",
       " ['实现', 4.749888768003179, 198],\n",
       " ['防', 4.749888768003179, 199],\n",
       " ['民营', 4.8712496250074455, 200],\n",
       " ['湖南', 4.749888768003179, 201],\n",
       " ['世锦赛', 4.808729268026112, 202],\n",
       " ['体验', 4.77887630487643, 203],\n",
       " ['投资', 4.839500926692866, 204],\n",
       " ['教育', 4.808729268026112, 205],\n",
       " ['获', 4.77887630487643, 206],\n",
       " ['用', 4.808729268026112, 207],\n",
       " ['社会', 4.808729268026112, 208],\n",
       " ['达', 4.77887630487643, 209],\n",
       " ['农业', 4.904039447830437, 210],\n",
       " ['或', 4.77887630487643, 211],\n",
       " ['主席', 4.839500926692866, 212],\n",
       " ['亚运', 4.839500926692866, 213],\n",
       " ['振兴', 4.808729268026112, 214],\n",
       " ['数字', 4.808729268026112, 215],\n",
       " ['金', 4.839500926692866, 216],\n",
       " ['会见', 4.808729268026112, 217],\n",
       " ['博物', 4.8712496250074455, 218],\n",
       " ['天津', 4.8712496250074455, 219],\n",
       " ['届', 4.808729268026112, 220],\n",
       " ['非', 4.937940999506118, 221],\n",
       " ['个', 4.808729268026112, 222],\n",
       " ['运动员', 4.8712496250074455, 223],\n",
       " ['游泳', 4.8712496250074455, 224],\n",
       " ['第三', 4.839500926692866, 225],\n",
       " ['冠军', 4.973032319317388, 226],\n",
       " ['青春', 4.8712496250074455, 227],\n",
       " ['计划', 4.839500926692866, 228],\n",
       " ['碳', 4.973032319317388, 229],\n",
       " ['至', 4.8712496250074455, 230],\n",
       " ['政策', 4.839500926692866, 231],\n",
       " ['成立', 4.839500926692866, 232],\n",
       " ['约', 4.839500926692866, 233],\n",
       " ['时代', 4.839500926692866, 234],\n",
       " ['接受', 4.839500926692866, 235],\n",
       " ['红色', 4.839500926692866, 236],\n",
       " ['民族', 4.904039447830437, 237],\n",
       " ['云南', 4.8712496250074455, 238],\n",
       " ['完成', 4.8712496250074455, 239],\n",
       " ['青', 5.009399963488263, 240],\n",
       " ['问', 4.8712496250074455, 241],\n",
       " ['中心', 4.8712496250074455, 242],\n",
       " ['航空', 5.04714029147111, 243],\n",
       " ['安徽', 4.8712496250074455, 244],\n",
       " ['报告', 4.8712496250074455, 245],\n",
       " ['博物馆', 4.937940999506118, 246],\n",
       " ['规模', 4.904039447830437, 247],\n",
       " ['保障', 4.8712496250074455, 248],\n",
       " ['平台', 4.8712496250074455, 249],\n",
       " ['灾害', 4.904039447830437, 250],\n",
       " ['进入', 4.904039447830437, 251],\n",
       " ['选手', 4.904039447830437, 252],\n",
       " ['重要', 4.904039447830437, 253],\n",
       " ['于', 4.904039447830437, 254],\n",
       " ['俄', 5.214194376134277, 255],\n",
       " ['发改委', 4.904039447830437, 256],\n",
       " ['金融', 5.009399963488263, 257],\n",
       " ['故事', 4.904039447830437, 258],\n",
       " ['支持', 4.904039447830437, 259],\n",
       " ['东西', 4.937940999506118, 260],\n",
       " ['都', 4.904039447830437, 261],\n",
       " ['海洋', 4.904039447830437, 262],\n",
       " ['部分', 4.973032319317388, 263],\n",
       " ['聚焦', 4.937940999506118, 264],\n",
       " ['赴', 4.937940999506118, 265],\n",
       " ['开放', 4.973032319317388, 266],\n",
       " ['辽宁', 4.973032319317388, 267],\n",
       " ['江苏', 4.973032319317388, 268],\n",
       " ['部门', 4.973032319317388, 269],\n",
       " ['开幕式', 4.973032319317388, 270],\n",
       " ['防汛', 5.04714029147111, 271],\n",
       " ['近', 4.937940999506118, 272],\n",
       " ['广州', 4.937940999506118, 273],\n",
       " ['不', 4.973032319317388, 274],\n",
       " ['赋', 4.937940999506118, 275],\n",
       " ['回应', 4.973032319317388, 276],\n",
       " ['制造', 5.009399963488263, 277],\n",
       " ['张', 5.04714029147111, 278],\n",
       " ['学习', 5.009399963488263, 279],\n",
       " ['促进', 4.973032319317388, 280],\n",
       " ['产品', 5.009399963488263, 281],\n",
       " ['升级', 4.973032319317388, 282],\n",
       " ['加强', 4.973032319317388, 283],\n",
       " ['开赛', 4.973032319317388, 284],\n",
       " ['出席', 4.973032319317388, 285],\n",
       " ['探访', 4.973032319317388, 286],\n",
       " ['副', 5.04714029147111, 287],\n",
       " ['技术', 5.009399963488263, 288],\n",
       " ['遗', 5.086361004624392, 289],\n",
       " ['现代', 4.973032319317388, 290],\n",
       " ['就', 4.973032319317388, 291],\n",
       " ['蒙古', 5.009399963488263, 292],\n",
       " ['女子', 4.973032319317388, 293],\n",
       " ['西藏', 5.04714029147111, 294],\n",
       " ['会议', 5.1271829991446465, 295],\n",
       " ['审查', 5.009399963488263, 296],\n",
       " ['沿海', 5.009399963488263, 297],\n",
       " ['探', 5.009399963488263, 298],\n",
       " ['海南', 5.009399963488263, 299],\n",
       " ['聚', 5.009399963488263, 300],\n",
       " ['铁路', 5.009399963488263, 301],\n",
       " ['推出', 5.009399963488263, 302],\n",
       " ['超强', 5.009399963488263, 303],\n",
       " ['卡', 5.009399963488263, 304],\n",
       " ['金牌', 5.04714029147111, 305],\n",
       " ['助', 5.04714029147111, 306],\n",
       " ['特色', 5.086361004624392, 307],\n",
       " ['海外', 5.009399963488263, 308],\n",
       " ['登陆', 5.009399963488263, 309],\n",
       " ['工业', 5.04714029147111, 310],\n",
       " ['总书记', 5.009399963488263, 311],\n",
       " ['游', 5.086361004624392, 312],\n",
       " ['力量', 5.04714029147111, 313],\n",
       " ['全面', 5.04714029147111, 314],\n",
       " ['资源', 5.169742613563442, 315],\n",
       " ['自然', 5.1271829991446465, 316],\n",
       " ['行业', 5.04714029147111, 317],\n",
       " ['运行', 5.04714029147111, 318],\n",
       " ['首个', 5.04714029147111, 319],\n",
       " ['集团', 5.04714029147111, 320],\n",
       " ['乌', 5.169742613563442, 321],\n",
       " ['驻', 5.04714029147111, 322],\n",
       " ['起', 5.04714029147111, 323],\n",
       " ['王', 5.04714029147111, 324],\n",
       " ['学者', 5.04714029147111, 325],\n",
       " ['恢复', 5.04714029147111, 326],\n",
       " ['村', 5.169742613563442, 327],\n",
       " ['农村', 5.169742613563442, 328],\n",
       " ['气象', 5.169742613563442, 329],\n",
       " ['受', 5.04714029147111, 330],\n",
       " ['工程', 5.086361004624392, 331],\n",
       " ['正式', 5.086361004624392, 332],\n",
       " ['生活', 5.086361004624392, 333],\n",
       " ['及', 5.086361004624392, 334],\n",
       " ['我国', 5.086361004624392, 335],\n",
       " ['首届', 5.086361004624392, 336],\n",
       " ['甘肃', 5.086361004624392, 337],\n",
       " ['警方', 5.086361004624392, 338],\n",
       " ['传统', 5.086361004624392, 339],\n",
       " ['这', 5.086361004624392, 340],\n",
       " ['运动会', 5.1271829991446465, 341],\n",
       " ['内蒙', 5.1271829991446465, 342],\n",
       " ['探索', 5.1271829991446465, 343],\n",
       " ['之路', 5.169742613563442, 344],\n",
       " ['决赛', 5.169742613563442, 345],\n",
       " ['文物', 5.1271829991446465, 346],\n",
       " ['消防', 5.214194376134277, 347],\n",
       " ['夏季', 5.1271829991446465, 348],\n",
       " ['洪水', 5.214194376134277, 349],\n",
       " ['西南', 5.1271829991446465, 350],\n",
       " ['内蒙古', 5.1271829991446465, 351],\n",
       " ['长春', 5.169742613563442, 352],\n",
       " ['正', 5.1271829991446465, 353],\n",
       " ['最大', 5.1271829991446465, 354],\n",
       " ['防御', 5.169742613563442, 355],\n",
       " ['未来', 5.214194376134277, 356],\n",
       " ['新能源', 5.1271829991446465, 357],\n",
       " ['中国队', 5.1271829991446465, 358],\n",
       " ['网', 5.1271829991446465, 359],\n",
       " ['人才', 5.169742613563442, 360],\n",
       " ['西部', 5.169742613563442, 361],\n",
       " ['澳门', 5.260714391769169, 362],\n",
       " ['智能', 5.169742613563442, 363],\n",
       " ['华文', 5.214194376134277, 364],\n",
       " ['我们', 5.214194376134277, 365],\n",
       " ['我', 5.309504555938601, 366],\n",
       " ['军事', 5.260714391769169, 367],\n",
       " ['累计', 5.169742613563442, 368],\n",
       " ['国人', 5.309504555938601, 369],\n",
       " ['齐', 5.169742613563442, 370],\n",
       " ['草原', 5.260714391769169, 371],\n",
       " ['行动', 5.169742613563442, 372],\n",
       " ['宣布', 5.169742613563442, 373],\n",
       " ['努', 5.169742613563442, 374],\n",
       " ['称', 5.214194376134277, 375],\n",
       " ['常委', 5.260714391769169, 376],\n",
       " ['第十', 5.169742613563442, 377],\n",
       " ['综合', 5.169742613563442, 378],\n",
       " ['情况', 5.169742613563442, 379],\n",
       " ['走', 5.169742613563442, 380],\n",
       " ['体系', 5.169742613563442, 381],\n",
       " ['全部', 5.214194376134277, 382],\n",
       " ['通报', 5.169742613563442, 383],\n",
       " ['提供', 5.169742613563442, 384],\n",
       " ['观察', 5.169742613563442, 385],\n",
       " ['南昌', 5.169742613563442, 386],\n",
       " ['天气', 5.169742613563442, 387],\n",
       " ['科', 5.260714391769169, 388],\n",
       " ['大赛', 5.214194376134277, 389],\n",
       " ['韩国', 5.260714391769169, 390],\n",
       " ['晋级', 5.214194376134277, 391],\n",
       " ['家', 5.214194376134277, 392],\n",
       " ['举措', 5.214194376134277, 393],\n",
       " ['儿童', 5.260714391769169, 394],\n",
       " ['中方', 5.214194376134277, 395],\n",
       " ['空间', 5.260714391769169, 396],\n",
       " ['感受', 5.214194376134277, 397],\n",
       " ['看', 5.260714391769169, 398],\n",
       " ['中外', 5.214194376134277, 399],\n",
       " ['政府', 5.214194376134277, 400],\n",
       " ['国防', 5.214194376134277, 401],\n",
       " ['空军', 5.309504555938601, 402],\n",
       " ['应对', 5.214194376134277, 403],\n",
       " ['成功', 5.214194376134277, 404],\n",
       " ['促', 5.214194376134277, 405],\n",
       " ['热', 5.214194376134277, 406],\n",
       " ['航线', 5.214194376134277, 407],\n",
       " ['福州', 5.214194376134277, 408],\n",
       " ['公园', 5.214194376134277, 409],\n",
       " ['队', 5.260714391769169, 410],\n",
       " ['继续', 5.260714391769169, 411],\n",
       " ['生产', 5.309504555938601, 412],\n",
       " ['精神', 5.260714391769169, 413],\n",
       " ['会', 5.260714391769169, 414],\n",
       " ['中新', 5.260714391769169, 415],\n",
       " ['美丽', 5.260714391769169, 416],\n",
       " ['开启', 5.260714391769169, 417],\n",
       " ['记者', 5.260714391769169, 418],\n",
       " ['事件', 5.260714391769169, 419],\n",
       " ['京津', 5.260714391769169, 420],\n",
       " ['守护', 5.309504555938601, 421],\n",
       " ['成', 5.260714391769169, 422],\n",
       " ['志愿', 5.3607978503261515, 423],\n",
       " ['新增', 5.260714391769169, 424],\n",
       " ['万亿', 5.260714391769169, 425],\n",
       " ['红', 5.309504555938601, 426],\n",
       " ['时', 5.309504555938601, 427],\n",
       " ['代表团', 5.260714391769169, 428],\n",
       " ['袭', 5.260714391769169, 429],\n",
       " ['委会', 5.3607978503261515, 430],\n",
       " ['口岸', 5.260714391769169, 431],\n",
       " ['暑期', 5.260714391769169, 432],\n",
       " ['六届', 5.260714391769169, 433],\n",
       " ['站', 5.309504555938601, 434],\n",
       " ['公司', 5.260714391769169, 435],\n",
       " ['突破', 5.260714391769169, 436],\n",
       " ['全力', 5.309504555938601, 437],\n",
       " ['中小', 5.309504555938601, 438],\n",
       " ['监管', 5.309504555938601, 439],\n",
       " ['大利', 5.309504555938601, 440],\n",
       " ['出', 5.3607978503261515, 441],\n",
       " ['生态环境', 5.3607978503261515, 442],\n",
       " ['京', 5.3607978503261515, 443],\n",
       " ['外交', 5.309504555938601, 444],\n",
       " ['国内', 5.309504555938601, 445],\n",
       " ['国大', 5.309504555938601, 446],\n",
       " ['防部', 5.309504555938601, 447],\n",
       " ['博会', 5.309504555938601, 448],\n",
       " ['锦标赛', 5.309504555938601, 449],\n",
       " ['世代', 5.309504555938601, 450],\n",
       " ['仍', 5.309504555938601, 451],\n",
       " ['列车', 5.309504555938601, 452],\n",
       " ['转型', 5.309504555938601, 453],\n",
       " ['官方', 5.309504555938601, 454],\n",
       " ['作品', 5.3607978503261515, 455],\n",
       " ['跨境', 5.309504555938601, 456],\n",
       " ['景区', 5.309504555938601, 457],\n",
       " ['京津冀', 5.309504555938601, 458],\n",
       " ['下', 5.309504555938601, 459],\n",
       " ['纪录', 5.309504555938601, 460],\n",
       " ['仪式', 5.309504555938601, 461],\n",
       " ['旅客', 5.309504555938601, 462],\n",
       " ['条', 5.309504555938601, 463],\n",
       " ['重大', 5.309504555938601, 464],\n",
       " ['锦标', 5.309504555938601, 465],\n",
       " ['陈', 5.309504555938601, 466],\n",
       " ['最新', 5.309504555938601, 467],\n",
       " ['学', 5.414865071596427, 468],\n",
       " ['评', 5.3607978503261515, 469],\n",
       " ['夏令营', 5.3607978503261515, 470],\n",
       " ['启幕', 5.3607978503261515, 471],\n",
       " ['进出口', 5.414865071596427, 472],\n",
       " ['机构', 5.414865071596427, 473],\n",
       " ['梦', 5.3607978503261515, 474],\n",
       " ['李', 5.3607978503261515, 475],\n",
       " ['赛事', 5.3607978503261515, 476],\n",
       " ['成为', 5.3607978503261515, 477],\n",
       " ['大使', 5.3607978503261515, 478],\n",
       " ['上演', 5.3607978503261515, 479],\n",
       " ['车', 5.414865071596427, 480],\n",
       " ['宁夏', 5.3607978503261515, 481],\n",
       " ['出现', 5.3607978503261515, 482],\n",
       " ['基本', 5.3607978503261515, 483],\n",
       " ['海', 5.3607978503261515, 484],\n",
       " ['第六', 5.3607978503261515, 485],\n",
       " ['东盟', 5.3607978503261515, 486],\n",
       " ['组织', 5.414865071596427, 487],\n",
       " ['进出', 5.414865071596427, 488],\n",
       " ['夏令', 5.3607978503261515, 489],\n",
       " ['军', 5.3607978503261515, 490],\n",
       " ['比赛', 5.414865071596427, 491],\n",
       " ['国', 5.3607978503261515, 492],\n",
       " ['国防部', 5.3607978503261515, 493],\n",
       " ['菲律宾', 5.414865071596427, 494],\n",
       " ['健康', 5.472023485436376, 495],\n",
       " ['公开', 5.414865071596427, 496],\n",
       " ['受审', 5.414865071596427, 497],\n",
       " ['亚运会', 5.414865071596427, 498],\n",
       " ['什么', 5.472023485436376, 499],\n",
       " ['最高', 5.414865071596427, 500],\n",
       " ['对话', 5.472023485436376, 501],\n",
       " ['秀', 5.414865071596427, 502],\n",
       " ['交通', 5.414865071596427, 503],\n",
       " ['陕西', 5.472023485436376, 504],\n",
       " ['进展', 5.414865071596427, 505],\n",
       " ['更', 5.532648107252811, 506],\n",
       " ['数据', 5.532648107252811, 507],\n",
       " ['带', 5.414865071596427, 508],\n",
       " ['结束', 5.414865071596427, 509],\n",
       " ['基地', 5.414865071596427, 510],\n",
       " ['坦', 5.414865071596427, 511],\n",
       " ['参与', 5.414865071596427, 512],\n",
       " ['强', 5.414865071596427, 513],\n",
       " ['系列', 5.414865071596427, 514],\n",
       " ['活力', 5.414865071596427, 515],\n",
       " ['两', 5.414865071596427, 516],\n",
       " ['游客', 5.414865071596427, 517],\n",
       " ['舞蹈', 5.597186628390382, 518],\n",
       " ['把', 5.472023485436376, 519],\n",
       " ['首', 5.472023485436376, 520],\n",
       " ['水平', 5.414865071596427, 521],\n",
       " ['美元', 5.414865071596427, 522],\n",
       " ['要', 5.414865071596427, 523],\n",
       " ['可', 5.414865071596427, 524],\n",
       " ['预计', 5.414865071596427, 525],\n",
       " ['第六届', 5.414865071596427, 526],\n",
       " ['利亚', 5.414865071596427, 527],\n",
       " ['夺得', 5.414865071596427, 528],\n",
       " ['带来', 5.414865071596427, 529],\n",
       " ['沈阳', 5.472023485436376, 530],\n",
       " ['军人', 5.414865071596427, 531],\n",
       " ['人大', 5.532648107252811, 532],\n",
       " ['直播', 5.472023485436376, 533],\n",
       " ['进', 5.532648107252811, 534],\n",
       " ['余', 5.414865071596427, 535],\n",
       " ['实施', 5.414865071596427, 536],\n",
       " ['获得', 5.414865071596427, 537],\n",
       " ['赛', 5.414865071596427, 538],\n",
       " ['基础', 5.597186628390382, 539],\n",
       " ['湾区', 5.472023485436376, 540],\n",
       " ['打击', 5.472023485436376, 541],\n",
       " ['雨', 5.472023485436376, 542],\n",
       " ['全', 5.472023485436376, 543],\n",
       " ['沪', 5.472023485436376, 544],\n",
       " ['华', 5.472023485436376, 545],\n",
       " ['中国女足', 5.472023485436376, 546],\n",
       " ['区', 5.472023485436376, 547],\n",
       " ['古', 5.532648107252811, 548],\n",
       " ['无人', 5.472023485436376, 549],\n",
       " ['学家', 5.666179499877334, 550],\n",
       " ['召开', 5.472023485436376, 551],\n",
       " ['足球', 5.666179499877334, 552],\n",
       " ['研', 5.472023485436376, 553],\n",
       " ['关系', 5.532648107252811, 554],\n",
       " ['水', 5.532648107252811, 555],\n",
       " ['亚洲', 5.532648107252811, 556],\n",
       " ['公益', 5.532648107252811, 557],\n",
       " ['夏日', 5.472023485436376, 558],\n",
       " ['事故', 5.472023485436376, 559],\n",
       " ['考察', 5.532648107252811, 560],\n",
       " ['共同', 5.472023485436376, 561],\n",
       " ['展', 5.472023485436376, 562],\n",
       " ['高温', 5.472023485436376, 563],\n",
       " ['稳定', 5.472023485436376, 564],\n",
       " ['一路', 5.472023485436376, 565],\n",
       " ['式', 5.472023485436376, 566],\n",
       " ['传播', 5.532648107252811, 567],\n",
       " ['音乐', 5.472023485436376, 568],\n",
       " ['关键', 5.472023485436376, 569],\n",
       " ['加大', 5.472023485436376, 570],\n",
       " ['迎来', 5.472023485436376, 571],\n",
       " ['现代化', 5.472023485436376, 572],\n",
       " ['企', 5.472023485436376, 573],\n",
       " ['主题', 5.472023485436376, 574],\n",
       " ['治理', 5.532648107252811, 575],\n",
       " ['加快', 5.472023485436376, 576],\n",
       " ['给', 5.472023485436376, 577],\n",
       " ['局', 5.532648107252811, 578],\n",
       " ['加拿大', 5.532648107252811, 579],\n",
       " ['人民币', 5.532648107252811, 580],\n",
       " ['巴', 5.597186628390382, 581],\n",
       " ['收官', 5.532648107252811, 582],\n",
       " ['机制', 5.532648107252811, 583],\n",
       " ['出台', 5.532648107252811, 584],\n",
       " ['粤', 5.666179499877334, 585],\n",
       " ['国企', 5.666179499877334, 586],\n",
       " ['海峡', 5.532648107252811, 587],\n",
       " ['共话', 5.532648107252811, 588],\n",
       " ['外交部', 5.532648107252811, 589],\n",
       " ['新高', 5.532648107252811, 590],\n",
       " ['银行', 5.532648107252811, 591],\n",
       " ['起来', 5.7402874720310555, 592],\n",
       " ['华裔', 5.532648107252811, 593],\n",
       " ['丰收', 5.532648107252811, 594],\n",
       " ['一个', 5.597186628390382, 595],\n",
       " ['升至', 5.532648107252811, 596],\n",
       " ['三届', 5.532648107252811, 597],\n",
       " ['以上', 5.532648107252811, 598],\n",
       " ['湖北', 5.597186628390382, 599],\n",
       " ['转移', 5.532648107252811, 600],\n",
       " ['加速', 5.532648107252811, 601],\n",
       " ['人士', 5.532648107252811, 602],\n",
       " ['公路', 5.597186628390382, 603],\n",
       " ['深化', 5.532648107252811, 604],\n",
       " ['标准', 5.532648107252811, 605],\n",
       " ['携手', 5.532648107252811, 606],\n",
       " ['智慧', 5.532648107252811, 607],\n",
       " ['救援', 5.666179499877334, 608],\n",
       " ['庆祝', 5.532648107252811, 609],\n",
       " ['新闻', 5.597186628390382, 610],\n",
       " ['亿', 5.532648107252811, 611],\n",
       " ['澳', 5.7402874720310555, 612],\n",
       " ['逼近', 5.532648107252811, 613],\n",
       " ['武术', 5.532648107252811, 614],\n",
       " ['人工', 5.597186628390382, 615],\n",
       " ['犯罪', 5.532648107252811, 616],\n",
       " ['使馆', 5.532648107252811, 617],\n",
       " ['部长', 5.666179499877334, 618],\n",
       " ['力', 5.532648107252811, 619],\n",
       " ['万亿元', 5.532648107252811, 620],\n",
       " ['公布', 5.532648107252811, 621],\n",
       " ['四届', 5.597186628390382, 622],\n",
       " ['媒体', 5.597186628390382, 623],\n",
       " ['加拿', 5.532648107252811, 624],\n",
       " ['大熊猫', 5.666179499877334, 625],\n",
       " ['医疗', 5.597186628390382, 626],\n",
       " ['前瞻', 5.597186628390382, 627],\n",
       " ['说', 5.597186628390382, 628],\n",
       " ['贸', 5.597186628390382, 629],\n",
       " ['深圳', 5.597186628390382, 630],\n",
       " ['开除', 5.597186628390382, 631],\n",
       " ['一带', 5.597186628390382, 632],\n",
       " ['重点', 5.597186628390382, 633],\n",
       " ['官员', 5.597186628390382, 634],\n",
       " ['监察', 5.597186628390382, 635],\n",
       " ['克斯', 5.597186628390382, 636],\n",
       " ['河南', 5.597186628390382, 637],\n",
       " ['优化', 5.597186628390382, 638],\n",
       " ['博览', 5.597186628390382, 639],\n",
       " ['委员', 5.597186628390382, 640],\n",
       " ['博览会', 5.597186628390382, 641],\n",
       " ['龙江', 5.597186628390382, 642],\n",
       " ['严重', 5.597186628390382, 643],\n",
       " ['万余', 5.597186628390382, 644],\n",
       " ['吉林', 5.597186628390382, 645],\n",
       " ['常委会', 5.7402874720310555, 646],\n",
       " ['高', 5.597186628390382, 647],\n",
       " ['生命', 5.597186628390382, 648],\n",
       " ['智', 5.597186628390382, 649],\n",
       " ['参加', 5.597186628390382, 650],\n",
       " ['民众', 5.597186628390382, 651],\n",
       " ['党组', 5.666179499877334, 652],\n",
       " ['外资', 5.597186628390382, 653],\n",
       " ['紧急', 5.597186628390382, 654],\n",
       " ['航天', 5.666179499877334, 655],\n",
       " ['发现', 5.597186628390382, 656],\n",
       " ['检察', 5.666179499877334, 657],\n",
       " ['总理', 5.597186628390382, 658],\n",
       " ['部队', 5.7402874720310555, 659],\n",
       " ['千年', 5.597186628390382, 660],\n",
       " ['交易', 5.597186628390382, 661],\n",
       " ['坍塌', 5.666179499877334, 662],\n",
       " ['机关', 5.597186628390382, 663],\n",
       " ['人次', 5.597186628390382, 664],\n",
       " ['创业', 5.666179499877334, 665],\n",
       " ['战略', 5.597186628390382, 666],\n",
       " ['领域', 5.597186628390382, 667],\n",
       " ['协同', 5.597186628390382, 668],\n",
       " ['品牌', 5.597186628390382, 669],\n",
       " ['措施', 5.597186628390382, 670],\n",
       " ['开除党籍', 5.597186628390382, 671],\n",
       " ['助推', 5.597186628390382, 672],\n",
       " ['熊猫', 5.666179499877334, 673],\n",
       " ['省', 5.597186628390382, 674],\n",
       " ['党籍', 5.597186628390382, 675],\n",
       " ['道', 5.7402874720310555, 676],\n",
       " ['黑龙江', 5.597186628390382, 677],\n",
       " ['较', 5.597186628390382, 678],\n",
       " ['展示', 5.597186628390382, 679],\n",
       " ['人大常委会', 5.7402874720310555, 680],\n",
       " ['和平', 5.597186628390382, 681],\n",
       " ['年轻', 5.597186628390382, 682],\n",
       " ['粤港', 5.597186628390382, 683],\n",
       " ['特', 5.597186628390382, 684],\n",
       " ['大熊', 5.666179499877334, 685],\n",
       " ['引', 5.597186628390382, 686],\n",
       " ['公安', 5.597186628390382, 687],\n",
       " ['客运', 5.666179499877334, 688],\n",
       " ['需', 5.666179499877334, 689],\n",
       " ['保持', 5.666179499877334, 690],\n",
       " ['出行', 5.666179499877334, 691],\n",
       " ['流域', 5.7402874720310555, 692],\n",
       " ['不断', 5.666179499877334, 693],\n",
       " ['节', 5.666179499877334, 694],\n",
       " ['低', 5.666179499877334, 695],\n",
       " ['问题', 5.666179499877334, 696],\n",
       " ['卫星', 5.666179499877334, 697],\n",
       " ['主任', 5.7402874720310555, 698],\n",
       " ['覆盖', 5.666179499877334, 699],\n",
       " ['胜', 5.666179499877334, 700],\n",
       " ['厦门', 5.666179499877334, 701],\n",
       " ['部署', 5.666179499877334, 702],\n",
       " ['部', 5.666179499877334, 703],\n",
       " ['构建', 5.666179499877334, 704],\n",
       " ['习', 5.666179499877334, 705],\n",
       " ['袭击', 5.666179499877334, 706],\n",
       " ['基层', 5.7402874720310555, 707],\n",
       " ['上市', 5.666179499877334, 708],\n",
       " ['成果', 5.666179499877334, 709],\n",
       " ['外', 5.666179499877334, 710],\n",
       " ['宁德', 5.666179499877334, 711],\n",
       " ['机场', 5.666179499877334, 712],\n",
       " ['致富', 5.666179499877334, 713],\n",
       " ['违法', 5.666179499877334, 714],\n",
       " ['强军', 5.820330179704592, 715],\n",
       " ['大暴雨', 5.7402874720310555, 716],\n",
       " ['整治', 5.666179499877334, 717],\n",
       " ['岁', 5.666179499877334, 718],\n",
       " ['沉浸', 5.666179499877334, 719],\n",
       " ['任', 5.666179499877334, 720],\n",
       " ['天', 5.666179499877334, 721],\n",
       " ['运输', 5.7402874720310555, 722],\n",
       " ['一起', 5.666179499877334, 723],\n",
       " ['农村部', 5.666179499877334, 724],\n",
       " ['韵', 5.666179499877334, 725],\n",
       " ['圆满', 5.666179499877334, 726],\n",
       " ['机遇', 5.666179499877334, 727],\n",
       " ['梦想', 5.7402874720310555, 728],\n",
       " ['克兰', 5.820330179704592, 729],\n",
       " ['高速', 5.666179499877334, 730],\n",
       " ['国建', 5.666179499877334, 731],\n",
       " ['海上', 5.666179499877334, 732],\n",
       " ['阶段', 5.666179499877334, 733],\n",
       " ['工', 5.666179499877334, 734],\n",
       " ['林', 5.820330179704592, 735],\n",
       " ['风险', 5.666179499877334, 736],\n",
       " ['工匠', 6.002651736498547, 737],\n",
       " ['贸易', 5.666179499877334, 738],\n",
       " ['商品', 5.666179499877334, 739],\n",
       " ['首批', 5.666179499877334, 740],\n",
       " ['希望', 5.666179499877334, 741],\n",
       " ['强调', 5.666179499877334, 742],\n",
       " ['种', 5.666179499877334, 743],\n",
       " ['园', 5.666179499877334, 744],\n",
       " ['哈萨克斯坦', 5.7402874720310555, 745],\n",
       " ['案', 5.7402874720310555, 746],\n",
       " ['潮', 5.820330179704592, 747],\n",
       " ['第号', 5.7402874720310555, 748],\n",
       " ['万元', 5.820330179704592, 749],\n",
       " ['粤港澳', 5.7402874720310555, 750],\n",
       " ['亿美元', 5.7402874720310555, 751],\n",
       " ['城', 5.7402874720310555, 752],\n",
       " ['这些', 5.7402874720310555, 753],\n",
       " ['高校', 5.7402874720310555, 754],\n",
       " ['愿者', 5.7402874720310555, 755],\n",
       " ['农', 5.7402874720310555, 756],\n",
       " ['萨克斯', 5.7402874720310555, 757],\n",
       " ['高峰', 5.7402874720310555, 758],\n",
       " ['事前', 5.7402874720310555, 759],\n",
       " ['人机', 5.7402874720310555, 760],\n",
       " ['印度', 5.7402874720310555, 761],\n",
       " ['周', 5.7402874720310555, 762],\n",
       " ['训练', 5.7402874720310555, 763],\n",
       " ['并', 5.7402874720310555, 764],\n",
       " ['舞蹈节', 5.7402874720310555, 765],\n",
       " ['国务', 5.7402874720310555, 766],\n",
       " ['数', 5.7402874720310555, 767],\n",
       " ['抗', 5.7402874720310555, 768],\n",
       " ['个人', 5.7402874720310555, 769],\n",
       " ['世界杯赛', 5.7402874720310555, 770],\n",
       " ['会首', 5.7402874720310555, 771],\n",
       " ['何以', 5.7402874720310555, 772],\n",
       " ['政协', 5.7402874720310555, 773],\n",
       " ['丝路', 5.7402874720310555, 774],\n",
       " ['二届', 5.7402874720310555, 775],\n",
       " ['学院', 5.820330179704592, 776],\n",
       " ['模式', 5.7402874720310555, 777],\n",
       " ['北上', 5.7402874720310555, 778],\n",
       " ['你', 5.7402874720310555, 779],\n",
       " ['杯赛', 5.7402874720310555, 780],\n",
       " ['落幕', 5.7402874720310555, 781],\n",
       " ['利用', 5.7402874720310555, 782],\n",
       " ['欢迎', 5.7402874720310555, 783],\n",
       " ['友谊', 5.7402874720310555, 784],\n",
       " ['团体', 5.7402874720310555, 785],\n",
       " ['中医', 5.907341556694222, 786],\n",
       " ['侨', 5.820330179704592, 787],\n",
       " ['拉萨', 5.820330179704592, 788],\n",
       " ['环境保护', 5.7402874720310555, 789],\n",
       " ['海市', 5.907341556694222, 790],\n",
       " ['哈萨克', 5.7402874720310555, 791],\n",
       " ['哈萨', 5.7402874720310555, 792],\n",
       " ['推介', 5.7402874720310555, 793],\n",
       " ['提', 5.7402874720310555, 794],\n",
       " ['电', 5.7402874720310555, 795],\n",
       " ['长', 5.820330179704592, 796],\n",
       " ['魅力', 5.7402874720310555, 797],\n",
       " ['地震', 5.7402874720310555, 798],\n",
       " ['班列', 5.7402874720310555, 799],\n",
       " ['会上', 5.7402874720310555, 800],\n",
       " ['总值', 5.7402874720310555, 801],\n",
       " ['为何', 5.7402874720310555, 802],\n",
       " ['去', 5.820330179704592, 803],\n",
       " ['信用', 5.820330179704592, 804],\n",
       " ['民间', 5.907341556694222, 805],\n",
       " ['东方', 5.7402874720310555, 806],\n",
       " ['干部', 5.7402874720310555, 807],\n",
       " ['破', 5.7402874720310555, 808],\n",
       " ['第三届', 5.7402874720310555, 809],\n",
       " ['吸引', 5.7402874720310555, 810],\n",
       " ['建军', 5.7402874720310555, 811],\n",
       " ['志愿者', 5.7402874720310555, 812],\n",
       " ['做', 5.820330179704592, 813],\n",
       " ['开营', 5.7402874720310555, 814],\n",
       " ['能量', 5.820330179704592, 815],\n",
       " ['欧洲', 5.7402874720310555, 816],\n",
       " ['内地', 5.7402874720310555, 817],\n",
       " ['绿', 5.820330179704592, 818],\n",
       " ['电力', 5.7402874720310555, 819],\n",
       " ['泉州', 5.7402874720310555, 820],\n",
       " ['力度', 5.7402874720310555, 821],\n",
       " ['大国', 6.002651736498547, 822],\n",
       " ['试验', 5.7402874720310555, 823],\n",
       " ['开始', 5.7402874720310555, 824],\n",
       " ['联', 5.7402874720310555, 825],\n",
       " ['内外', 5.7402874720310555, 826],\n",
       " ['民族舞', 5.7402874720310555, 827],\n",
       " ['多元', 5.7402874720310555, 828],\n",
       " ['何', 5.7402874720310555, 829],\n",
       " ['两国', 5.820330179704592, 830],\n",
       " ['绘', 5.7402874720310555, 831],\n",
       " ['造成', 5.7402874720310555, 832],\n",
       " ['萨克', 5.7402874720310555, 833],\n",
       " ['南京', 5.7402874720310555, 834],\n",
       " ['大陆', 5.820330179704592, 835],\n",
       " ['指数', 5.820330179704592, 836],\n",
       " ['更多', 5.7402874720310555, 837],\n",
       " ['联合国', 5.7402874720310555, 838],\n",
       " ['西安', 5.7402874720310555, 839],\n",
       " ['合发', 5.7402874720310555, 840],\n",
       " ['信息', 5.7402874720310555, 841],\n",
       " ['集中', 5.7402874720310555, 842],\n",
       " ['区域', 5.7402874720310555, 843],\n",
       " ['精品', 5.7402874720310555, 844],\n",
       " ['挑战', 5.7402874720310555, 845],\n",
       " ['依法', 5.7402874720310555, 846],\n",
       " ['中学', 5.7402874720310555, 847],\n",
       " ['话', 5.7402874720310555, 848],\n",
       " ['能力', 5.907341556694222, 849],\n",
       " ['信号', 5.820330179704592, 850],\n",
       " ['趋势', 5.820330179704592, 851],\n",
       " ['飞行', 5.820330179704592, 852],\n",
       " ['方式', 5.820330179704592, 853],\n",
       " ['大运村', 5.820330179704592, 854],\n",
       " ['中华文明', 5.907341556694222, 855],\n",
       " ['案件', 5.820330179704592, 856],\n",
       " ['又', 5.820330179704592, 857],\n",
       " ['作家', 5.820330179704592, 858],\n",
       " ['任务', 5.820330179704592, 859],\n",
       " ['人工智能', 5.907341556694222, 860],\n",
       " ['无人机', 5.820330179704592, 861],\n",
       " ['呈现', 5.820330179704592, 862],\n",
       " ['道路', 5.820330179704592, 863],\n",
       " ['长三角', 5.820330179704592, 864],\n",
       " ['印尼', 5.907341556694222, 865],\n",
       " ['长江', 5.820330179704592, 866],\n",
       " ['公职', 5.820330179704592, 867],\n",
       " ['直', 5.820330179704592, 868],\n",
       " ['抗美援朝', 5.820330179704592, 869],\n",
       " ['民生', 5.820330179704592, 870],\n",
       " ['长三', 5.820330179704592, 871],\n",
       " ['海峡两岸', 5.820330179704592, 872],\n",
       " ['第二届', 5.820330179704592, 873],\n",
       " ['夺冠', 5.820330179704592, 874],\n",
       " ['养老', 5.907341556694222, 875],\n",
       " ['停运', 5.820330179704592, 876],\n",
       " ['茶', 5.907341556694222, 877],\n",
       " ['如', 6.002651736498547, 878],\n",
       " ['一届', 5.820330179704592, 879],\n",
       " ['扩大', 5.820330179704592, 880],\n",
       " ['河流', 5.820330179704592, 881],\n",
       " ['图', 5.820330179704592, 882],\n",
       " ['风雨', 5.820330179704592, 883],\n",
       " ['纪念', 5.820330179704592, 884],\n",
       " ['市民', 5.820330179704592, 885],\n",
       " ['兵团', 5.820330179704592, 886],\n",
       " ['覃', 5.820330179704592, 887],\n",
       " ['位', 5.820330179704592, 888],\n",
       " ['会主', 5.820330179704592, 889],\n",
       " ['坠毁', 5.820330179704592, 890],\n",
       " ['西亚', 5.820330179704592, 891],\n",
       " ['遭', 5.820330179704592, 892],\n",
       " ['股', 5.820330179704592, 893],\n",
       " ['吗', 5.820330179704592, 894],\n",
       " ['品', 5.820330179704592, 895],\n",
       " ['访', 5.820330179704592, 896],\n",
       " ['受伤', 5.820330179704592, 897],\n",
       " ['击', 5.820330179704592, 898],\n",
       " ['川', 5.820330179704592, 899],\n",
       " ['水利', 5.820330179704592, 900],\n",
       " ['涉', 5.820330179704592, 901],\n",
       " ['第四', 5.820330179704592, 902],\n",
       " ['季', 5.820330179704592, 903],\n",
       " ['桥', 5.820330179704592, 904],\n",
       " ['积水', 5.820330179704592, 905],\n",
       " ['朝', 5.820330179704592, 906],\n",
       " ['生', 5.820330179704592, 907],\n",
       " ['南部', 5.820330179704592, 908],\n",
       " ['营', 5.907341556694222, 909],\n",
       " ['元', 5.820330179704592, 910],\n",
       " ['发射', 5.820330179704592, 911],\n",
       " ['三角', 5.820330179704592, 912],\n",
       " ['受灾', 5.820330179704592, 913],\n",
       " ['森林', 5.820330179704592, 914],\n",
       " ['连续', 5.820330179704592, 915],\n",
       " ['外长', 5.907341556694222, 916],\n",
       " ['时间', 5.820330179704592, 917],\n",
       " ['使用', 5.820330179704592, 918],\n",
       " ['刘', 5.820330179704592, 919],\n",
       " ['上线', 5.820330179704592, 920],\n",
       " ['风级', 5.820330179704592, 921],\n",
       " ['十四', 5.907341556694222, 922],\n",
       " ['电影', 5.907341556694222, 923],\n",
       " ['上海市', 6.002651736498547, 924],\n",
       " ['局势', 5.820330179704592, 925],\n",
       " ['签约', 5.820330179704592, 926],\n",
       " ['同', 5.820330179704592, 927],\n",
       " ['粮食', 5.820330179704592, 928],\n",
       " ['云', 5.907341556694222, 929],\n",
       " ['八一', 5.907341556694222, 930],\n",
       " ['稳', 5.820330179704592, 931],\n",
       " ['美援', 5.820330179704592, 932],\n",
       " ['时政', 5.820330179704592, 933],\n",
       " ['点燃', 5.820330179704592, 934],\n",
       " ['漳州', 5.820330179704592, 935],\n",
       " ['山洪', 5.907341556694222, 936],\n",
       " ['摄影', 5.907341556694222, 937],\n",
       " ['救助', 5.820330179704592, 938],\n",
       " ['培训', 5.907341556694222, 939],\n",
       " ['长期', 5.907341556694222, 940],\n",
       " ['互联', 5.907341556694222, 941],\n",
       " ['以来', 5.907341556694222, 942],\n",
       " ['委', 5.907341556694222, 943],\n",
       " ['书', 6.002651736498547, 944],\n",
       " ['南沙', 6.002651736498547, 945],\n",
       " ['外贸', 5.907341556694222, 946],\n",
       " ['十六', 5.907341556694222, 947],\n",
       " ['停航', 5.907341556694222, 948],\n",
       " ['之乡', 5.907341556694222, 949],\n",
       " ['万户', 5.907341556694222, 950],\n",
       " ['武警', 5.907341556694222, 951],\n",
       " ['责任', 5.907341556694222, 952],\n",
       " ['增加', 5.907341556694222, 953],\n",
       " ['张家', 5.907341556694222, 954],\n",
       " ['携', 5.907341556694222, 955],\n",
       " ['尼', 6.002651736498547, 956],\n",
       " ['装备', 5.907341556694222, 957],\n",
       " ['并举', 5.907341556694222, 958],\n",
       " ['总体', 5.907341556694222, 959],\n",
       " ['医药', 6.002651736498547, 960],\n",
       " ['风', 5.907341556694222, 961],\n",
       " ['机器', 6.002651736498547, 962],\n",
       " ['遗产', 5.907341556694222, 963],\n",
       " ['中共', 6.108012252156373, 964],\n",
       " ['谈', 5.907341556694222, 965],\n",
       " ['巴蜀', 5.907341556694222, 966],\n",
       " ['积极', 5.907341556694222, 967],\n",
       " ['事务', 5.907341556694222, 968],\n",
       " ['电量', 6.002651736498547, 969],\n",
       " ['港澳台', 5.907341556694222, 970],\n",
       " ['观', 5.907341556694222, 971],\n",
       " ['社区', 6.108012252156373, 972],\n",
       " ['罗斯', 5.907341556694222, 973],\n",
       " ['大幅', 5.907341556694222, 974],\n",
       " ['传递', 6.002651736498547, 975],\n",
       " ['形成', 5.907341556694222, 976],\n",
       " ['敦煌', 6.002651736498547, 977],\n",
       " ['共生', 5.907341556694222, 978],\n",
       " ['现', 5.907341556694222, 979],\n",
       " ['十四届', 6.002651736498547, 980],\n",
       " ['因', 5.907341556694222, 981],\n",
       " ['田', 6.002651736498547, 982],\n",
       " ['夺', 5.907341556694222, 983],\n",
       " ['入境', 5.907341556694222, 984],\n",
       " ['领导', 6.002651736498547, 985],\n",
       " ['爆炸', 5.907341556694222, 986],\n",
       " ['哈尔', 5.907341556694222, 987],\n",
       " ['蛙泳', 5.907341556694222, 988],\n",
       " ['记', 5.907341556694222, 989],\n",
       " ['尼亚', 5.907341556694222, 990],\n",
       " ['中欧', 6.002651736498547, 991],\n",
       " ['清凉', 5.907341556694222, 992],\n",
       " ['光伏', 6.108012252156373, 993],\n",
       " ['俄罗斯', 5.907341556694222, 994],\n",
       " ['毅', 5.907341556694222, 995],\n",
       " ['发行', 5.907341556694222, 996],\n",
       " ['示范', 5.907341556694222, 997],\n",
       " ['进行', 5.907341556694222, 998],\n",
       " ['国外', 5.907341556694222, 999],\n",
       " ...]"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "keywords_list_with_idf = list(zip(cv_model.vocabulary, idf_model.idf.toArray()))\n",
    "\n",
    "def append_index(data):\n",
    "    for index in range(len(data)):\n",
    "        data[index] = list(data[index])  # 将元组转为list\n",
    "        data[index].append(index)  # 加入索引\n",
    "        data[index][1] = float(data[index][1])\n",
    " \n",
    "append_index(keywords_list_with_idf)\n",
    "keywords_list_with_idf"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "+--------+------------------+-----+\n",
      "|keywords|               idf|index|\n",
      "+--------+------------------+-----+\n",
      "|    中国|1.9387663817611542|    0|\n",
      "|      的|2.2793708556672776|    1|\n",
      "|      在| 2.581651727540211|    2|\n",
      "|    发展|2.7797838903608083|    3|\n",
      "|    大运|3.0119320047680995|    4|\n",
      "|    成都|3.0425466405877066|    5|\n",
      "|      将|3.1347528344544404|    6|\n",
      "|    台风| 3.223832464508129|    7|\n",
      "|  大运会|3.2618117125733455|    8|\n",
      "|      人|3.4148877012708385|    9|\n",
      "|    北京| 3.356476939114424|   10|\n",
      "|    国家| 3.342392199232685|   11|\n",
      "|      新|3.4300395062914406|   12|\n",
      "|      年| 3.377983144335387|   13|\n",
      "|      和| 3.385255903664467|   14|\n",
      "|      苏| 3.377983144335387|   15|\n",
      "|    半年| 3.377983144335387|   16|\n",
      "|    文化|3.3999620510541626|   17|\n",
      "|      杜|  3.39258194375654|   18|\n",
      "|    世界|  3.44542442513092|   19|\n",
      "+--------+------------------+-----+\n",
      "only showing top 20 rows\n",
      "\n"
     ]
    }
   ],
   "source": [
    "sc = spark.sparkContext\n",
    "rdd = sc.parallelize(keywords_list_with_idf)  # 创建rdd\n",
    "idf_keywords = rdd.toDF([\"keywords\", \"idf\", \"index\"])\n",
    "idf_keywords.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "                                                                                \r"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "+-------------------+--------+-------+\n",
      "|                 id|keywords|weights|\n",
      "+-------------------+--------+-------+\n",
      "|4785099261031219201|    央行| 6.3593|\n",
      "|4785099261123493889|    央行| 6.3593|\n",
      "|4785099262264344577|    央行| 6.3593|\n",
      "|4785099261815554049|    工具| 6.6958|\n",
      "|4785099262012686337|      忙| 6.6958|\n",
      "|4785099261651976193|    山中| 7.6121|\n",
      "|4785099261509369857|      谋| 7.6121|\n",
      "|4785099262608277505|    众多| 7.6121|\n",
      "|4785099262310481921|    京门| 6.6958|\n",
      "|4785099262687969281|    免费| 6.9189|\n",
      "|4785099261941383169|    发送| 6.9189|\n",
      "|4785099262633443329|    谋划| 7.6121|\n",
      "|4785099262461476865|    航空| 5.0471|\n",
      "|4785099262671192065|    通胀| 7.2066|\n",
      "|4785099261844914177|    世界| 3.4454|\n",
      "|4785099261157048321|    女性| 6.5135|\n",
      "|4785099262184652801|  西班牙| 6.6958|\n",
      "|4785099261341597697|    弹性| 7.2066|\n",
      "|4785099261563895809|    弹性| 7.2066|\n",
      "|4785099261488398337|    大政| 7.2066|\n",
      "+-------------------+--------+-------+\n",
      "only showing top 20 rows\n",
      "\n"
     ]
    }
   ],
   "source": [
    "\n",
    "keywords_result_s = keywords_by_tfidf_s.join(idf_keywords, idf_keywords.index == keywords_by_tfidf_s.index).select(\n",
    "    [\"id\", \"keywords\", \"weights\"])\n",
    "keywords_result_s.rdd.toDF([\"id\", \"keywords\", \"weights\"]).show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "                                                                                \r"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "+-------------------+--------+-------+\n",
      "|                 id|keywords|weights|\n",
      "+-------------------+--------+-------+\n",
      "|4784374258232983553|      为| 3.5957|\n",
      "|4784714422553673729|      为| 3.5957|\n",
      "|4784374253497614337|      梦| 5.3608|\n",
      "|4784374254109982721|    央行| 6.3593|\n",
      "|4784374253304676353|  门头沟| 6.3593|\n",
      "|4784374253367590913|  门头沟| 6.3593|\n",
      "|4784374253782827009|  门头沟| 6.3593|\n",
      "|4784374254705573889|  门头沟| 6.3593|\n",
      "|4784374255057895425|  门头沟| 6.3593|\n",
      "|4784374255171141633|  门头沟| 6.3593|\n",
      "|4784374255452160001|  门头沟| 6.3593|\n",
      "|4784374255947087873|  门头沟| 6.3593|\n",
      "|4784374256576233473|  门头沟| 6.3593|\n",
      "|4784374257704501249|  门头沟| 6.3593|\n",
      "|4784374257914216449|  门头沟| 6.3593|\n",
      "|4784374258232983553|  门头沟| 6.3593|\n",
      "|4784714421505097729|  门头沟| 6.3593|\n",
      "|4784714421773533185|  门头沟| 6.3593|\n",
      "|4784714422515924993|  门头沟| 6.3593|\n",
      "|4784714419160481793|    国有| 6.5135|\n",
      "+-------------------+--------+-------+\n",
      "only showing top 20 rows\n",
      "\n"
     ]
    }
   ],
   "source": [
    "\n",
    "keywords_result_e = keywords_by_tfidf_e.join(idf_keywords, idf_keywords.index == keywords_by_tfidf_e.index).select(\n",
    "    [\"id\", \"keywords\", \"weights\"])\n",
    "keywords_result_e.rdd.toDF([\"id\", \"keywords\", \"weights\"]).show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2023-08-04 08:46:55,862 WARN scheduler.TaskSetManager: Stage 39 contains a task of very large size (1277 KiB). The maximum recommended task size is 1000 KiB.\n",
      "                                                                                \r"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "+-------------------+--------+-------+----+--------------------+\n",
      "|                 id|keywords|weights|word|              vector|\n",
      "+-------------------+--------+-------+----+--------------------+\n",
      "|4785099261031219201|    央行| 6.3593|央行|[-0.0062539251521...|\n",
      "|4785099261123493889|    央行| 6.3593|央行|[-0.0062539251521...|\n",
      "|4785099262264344577|    央行| 6.3593|央行|[-0.0062539251521...|\n",
      "|4785099261815554049|    工具| 6.6958|工具|[0.02509293332695...|\n",
      "|4785099262012686337|      忙| 6.6958|  忙|[-0.0029719017911...|\n",
      "|4785099262310481921|    京门| 6.6958|京门|[0.01083097979426...|\n",
      "|4785099262687969281|    免费| 6.9189|免费|[0.01649738848209...|\n",
      "|4785099261941383169|    发送| 6.9189|发送|[-0.0151850245893...|\n",
      "|4785099262461476865|    航空| 5.0471|航空|[0.06224365532398...|\n",
      "|4785099261844914177|    世界| 3.4454|世界|[0.05207106098532...|\n",
      "+-------------------+--------+-------+----+--------------------+\n",
      "only showing top 10 rows\n",
      "\n"
     ]
    }
   ],
   "source": [
    "keywords_vector_s = keywords_result_s.join(vectors, vectors.word == keywords_result_s.keywords, 'inner')\n",
    "keywords_vector_s.rdd.toDF([\"id\", \"keywords\", \"weights\"]).show(10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2023-08-04 08:47:00,626 WARN scheduler.TaskSetManager: Stage 52 contains a task of very large size (1277 KiB). The maximum recommended task size is 1000 KiB.\n",
      "                                                                                \r"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "+-------------------+--------+-------+------+--------------------+\n",
      "|                 id|keywords|weights|  word|              vector|\n",
      "+-------------------+--------+-------+------+--------------------+\n",
      "|4784374258232983553|      为| 3.5957|    为|[0.00382332876324...|\n",
      "|4784714422553673729|      为| 3.5957|    为|[0.00382332876324...|\n",
      "|4784374253497614337|      梦| 5.3608|    梦|[0.02015652135014...|\n",
      "|4784374254109982721|    央行| 6.3593|  央行|[-0.0062539251521...|\n",
      "|4784374253304676353|  门头沟| 6.3593|门头沟|[-5.3986581042408...|\n",
      "|4784374253367590913|  门头沟| 6.3593|门头沟|[-5.3986581042408...|\n",
      "|4784374253782827009|  门头沟| 6.3593|门头沟|[-5.3986581042408...|\n",
      "|4784374254705573889|  门头沟| 6.3593|门头沟|[-5.3986581042408...|\n",
      "|4784374255057895425|  门头沟| 6.3593|门头沟|[-5.3986581042408...|\n",
      "|4784374255171141633|  门头沟| 6.3593|门头沟|[-5.3986581042408...|\n",
      "+-------------------+--------+-------+------+--------------------+\n",
      "only showing top 10 rows\n",
      "\n"
     ]
    }
   ],
   "source": [
    "keywords_vector_e = keywords_result_e.join(vectors, vectors.word == keywords_result_e.keywords, 'inner')\n",
    "keywords_vector_e.rdd.toDF([\"id\", \"keywords\", \"weights\"]).show(10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [],
   "source": [
    "def compute_vector(row):\n",
    "    return row.id, row.keywords, row.weights * row.vector\n",
    " \n",
    "article_keyword_vectors_e = keywords_vector_e.rdd.map(compute_vector).toDF([\"id\", \"keywords\", \"weightingVector\"])\n",
    " \n",
    "# 利用 collect_set() 方法，将一篇文章内所有关键词的词向量合并为一个列表\n",
    "article_keyword_vectors_e.registerTempTable('temptable')\n",
    "article_keyword_vectors_e = spark.sql(\"select id, collect_set(weightingVector) vectors from temptable group by id\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [],
   "source": [
    "article_keyword_vectors_s = keywords_vector_s.rdd.map(compute_vector).toDF([\"id\", \"keywords\", \"weightingVector\"])\n",
    " \n",
    "# 利用 collect_set() 方法，将一篇文章内所有关键词的词向量合并为一个列表\n",
    "article_keyword_vectors_s.registerTempTable('temptable')\n",
    "article_keyword_vectors_s = spark.sql(\"select id, collect_set(weightingVector) vectors from temptable group by id\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "                                                                                \r"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "文章最终vector [Row(id='4785099261148659713', articlevector=DenseVector([-0.0091, -0.1598, -0.046, 0.0075, -0.0111, -0.0245, -0.0101, 0.0706, -0.0972, 0.0109, 0.048, -0.0446, -0.0063, 0.0313, -0.0017, -0.0612, 0.07, -0.0047, -0.058, 0.0063, 0.1373, 0.0512, -0.0169, 0.0088, -0.0089, 0.0205, -0.0352, 0.02, -0.1208, 0.0904, 0.1857, 0.1329, 0.0232, 0.0339, 0.061, 0.0177, 0.0265, 0.0376, 0.0799, 0.0698, 0.071, 0.0734, 0.0513, 0.0752, 0.0965, 0.0006, 0.0141, 0.11, -0.0034, -0.095, -0.039, -0.1415, -0.0824, -0.0418, -0.0281, -0.0588, 0.036, -0.08, -0.0032, -0.0417, 0.016, 0.1078, -0.1315, -0.0016])), Row(id='4785099261781999617', articlevector=DenseVector([0.0924, 0.0434, 0.0496, 0.0433, 0.0035, 0.0072, 0.0266, 0.0197, -0.0521, -0.0529, -0.0271, 0.0059, 0.065, 0.0736, -0.0287, -0.0749, 0.0692, 0.0378, -0.0864, 0.0069, 0.0318, 0.0316, 0.0225, -0.0781, -0.0259, 0.0262, 0.0486, -0.0927, -0.005, 0.0084, -0.0471, 0.031, 0.0382, -0.0044, 0.0023, -0.0353, 0.0642, -0.0251, -0.057, -0.0045, -0.0618, -0.0133, -0.0251, -0.0228, 0.0109, 0.0019, 0.0053, -0.0527, -0.0275, 0.0338, 0.0641, -0.0898, -0.0039, -0.0324, 0.0006, 0.0954, -0.0379, 0.006, 0.0235, 0.1013, -0.0727, -0.0557, -0.0126, 0.0537])), Row(id='4785099262193041409', articlevector=DenseVector([0.0791, -0.144, -0.1056, 0.0497, -0.0021, 0.0053, 0.1269, -0.0083, -0.2488, 0.0164, -0.014, -0.0592, 0.1053, 0.2054, 0.1314, -0.2244, 0.2475, 0.0508, -0.2202, 0.1025, 0.1305, 0.0391, 0.0278, -0.1085, -0.073, 0.0121, 0.0322, -0.0911, -0.0141, 0.1441, 0.1821, 0.0712, -0.0342, 0.0926, 0.0896, 0.0402, 0.0499, 0.0039, 0.0698, 0.0973, 0.1471, -0.006, 0.0364, -0.0251, 0.0215, -0.0337, -0.0181, 0.0699, 0.0803, 0.0094, 0.0398, -0.1826, -0.1953, 0.005, 0.0088, 0.0423, 0.0304, -0.0937, 0.0231, 0.1828, -0.1101, -0.0515, -0.1047, 0.1597])), Row(id='4785099262339842049', articlevector=DenseVector([-0.032, 0.0109, -0.0696, 0.145, -0.0079, 0.0073, 0.0566, -0.0183, -0.0741, -0.0435, -0.0141, -0.051, 0.0656, 0.0841, -0.0119, -0.0938, 0.0776, 0.0357, -0.1165, 0.0653, 0.0246, 0.0583, -0.013, -0.0214, -0.0632, -0.0334, 0.0067, -0.0272, 0.0115, -0.0164, -0.0421, 0.0491, -0.0383, -0.0364, 0.0305, -0.0182, 0.0313, 0.002, -0.0569, -0.0244, 0.0617, 0.0108, -0.0445, 0.005, -0.0147, -0.0118, 0.0344, -0.0414, 0.0139, 0.0114, 0.0071, -0.0217, -0.1172, 0.0127, -0.046, 0.0465, 0.0361, -0.0226, -0.0103, 0.0897, -0.0604, -0.0316, -0.0308, 0.0695])), Row(id='4785099261463232513', articlevector=DenseVector([-0.0233, -0.0652, -0.0958, 0.4245, -0.1513, -0.0634, 0.1473, 0.0302, -0.1167, -0.1014, 0.0017, -0.1544, 0.3092, 0.2237, -0.1496, -0.3641, 0.3618, 0.1035, -0.4159, 0.1275, 0.1575, 0.1755, -0.0183, -0.1505, -0.1934, 0.0068, 0.1004, -0.1894, -0.0747, -0.0039, -0.0932, 0.3022, -0.0188, -0.1227, 0.0087, 0.0368, 0.152, 0.0538, -0.105, -0.0656, 0.1177, -0.0181, -0.0998, 0.0814, 0.0958, 0.0412, 0.1268, -0.0464, -0.1651, 0.0375, 0.0161, -0.1071, -0.3265, -0.0415, -0.1872, 0.1609, -0.0127, -0.034, -0.0669, 0.2918, -0.1756, -0.0176, -0.0456, 0.0269])), Row(id='4785099261798776833', articlevector=DenseVector([-0.0168, -0.0154, -0.0895, 0.1186, 0.002, -0.0197, 0.0204, -0.0034, -0.1055, -0.0491, 0.0115, -0.0572, 0.0467, 0.0622, -0.0155, -0.0985, 0.0646, 0.014, -0.119, 0.0497, 0.0388, 0.064, -0.0361, -0.0027, -0.0531, -0.0147, -0.0157, 0.0012, 0.0024, 0.0109, 0.0136, 0.0578, -0.0077, -0.0281, 0.0306, 0.0066, 0.0215, -0.0033, -0.0396, -0.0226, 0.0768, 0.029, -0.0208, 0.0214, 0.0053, 0.0066, 0.0282, -0.0038, -0.0084, 0.0071, -0.0014, -0.044, -0.11, 0.01, -0.0394, 0.0115, 0.0228, -0.0265, -0.0049, 0.06, -0.0627, -0.0002, -0.0547, 0.0472])), Row(id='4785099261861691393', articlevector=DenseVector([-0.5457, -1.2894, -1.2904, -0.0738, 0.4942, 0.1006, 0.055, -0.7049, -2.1176, 0.1276, -0.1655, -0.6733, -0.2435, 0.5312, 1.9022, -0.6237, 1.2327, -0.1801, -0.3778, 0.7648, 0.1975, -0.4759, 0.0005, 0.0397, 0.2174, -0.0537, -0.1549, 0.6837, 0.6213, 1.2473, 1.4108, -0.5172, -0.845, 0.7875, 0.7568, 0.6978, -0.4834, -0.4502, 0.8889, 0.7501, 1.7477, -0.1903, 0.9138, -0.501, -0.7321, -0.3532, -0.8254, 0.2429, 1.2823, -0.3789, -0.1741, -0.4706, -1.3404, 0.5073, 0.0975, -0.5394, 0.4239, -0.969, 0.3988, 0.6828, -0.6633, -0.1147, -0.962, 1.3235])), Row(id='4785099262058823681', articlevector=DenseVector([0.0645, -0.0034, 0.0179, -0.0222, -0.0399, 0.0248, 0.039, -0.018, -0.0747, 0.0373, -0.0071, 0.0036, 0.0714, 0.1049, 0.0095, -0.0756, 0.1042, 0.0308, -0.1201, 0.0342, 0.0986, 0.0642, 0.0202, -0.0591, -0.0324, 0.0327, 0.0348, -0.0597, -0.0672, 0.0566, 0.0586, 0.0689, 0.047, 0.0153, 0.0312, 0.0037, 0.0643, -0.003, -0.006, 0.0284, 0.0087, -0.0213, 0.0051, 0.0178, 0.0317, 0.0219, 0.0091, 0.0504, 0.0151, 0.0129, 0.0852, -0.0896, -0.0464, -0.0386, 0.0266, 0.0535, -0.0348, -0.0156, 0.0072, 0.0691, -0.0434, -0.0467, -0.0618, 0.046])), Row(id='4785099261605838849', articlevector=DenseVector([0.0101, -0.0502, -0.0981, 0.1127, -0.0592, -0.0205, 0.0717, 0.0291, -0.2155, -0.0205, -0.0232, -0.046, 0.0637, 0.107, 0.1179, -0.1393, 0.1432, 0.0362, -0.1824, 0.0685, 0.0547, 0.0929, 0.0196, -0.1265, -0.0006, -0.0537, 0.0232, 0.014, -0.0337, 0.0705, 0.1011, 0.1314, -0.0852, 0.0076, 0.0821, 0.0466, 0.0144, -0.0404, 0.0047, 0.04, 0.1073, 0.0068, 0.0074, -0.0364, -0.0367, 0.0338, -0.0314, 0.002, 0.0541, 0.0437, 0.026, -0.1393, -0.1321, 0.0391, -0.0159, 0.0563, 0.0491, -0.1097, -0.0003, 0.0983, -0.124, 0.0116, -0.0461, 0.0766])), Row(id='4785099262650220545', articlevector=DenseVector([0.0302, -0.1285, -0.1479, 0.096, -0.0687, 0.033, 0.0656, 0.0023, -0.2032, 0.045, -0.0228, -0.051, 0.1059, 0.1583, 0.0729, -0.162, 0.2289, 0.0491, -0.24, 0.0811, 0.174, 0.015, 0.0068, -0.0802, -0.0721, 0.0473, 0.0245, -0.0727, -0.0009, 0.1157, 0.1806, 0.0887, -0.0221, 0.0546, 0.059, 0.0454, 0.0299, 0.003, 0.0359, 0.0742, 0.1437, -0.0138, 0.0541, 0.0271, 0.0543, -0.0166, -0.0274, 0.0796, 0.0417, -0.0334, 0.0176, -0.1336, -0.1503, -0.0138, -0.0128, 0.0253, 0.0024, -0.1069, 0.0022, 0.1633, -0.0613, -0.0082, -0.0711, 0.1105]))]\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[Stage 96:=========================================>              (14 + 2) / 19]\r"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "+-------------------+--------------------+\n",
      "|                 id|       articlevector|\n",
      "+-------------------+--------------------+\n",
      "|4785099261148659713|[-0.0091355079288...|\n",
      "|4785099261781999617|[0.09235456275278...|\n",
      "|4785099262193041409|[0.07914631862803...|\n",
      "|4785099262339842049|[-0.0320453816271...|\n",
      "|4785099261463232513|[-0.0232540343620...|\n",
      "|4785099261798776833|[-0.0167998276466...|\n",
      "|4785099261861691393|[-0.5457354395017...|\n",
      "|4785099262058823681|[0.06446473496534...|\n",
      "|4785099261605838849|[0.01008328102584...|\n",
      "|4785099262650220545|[0.03023930069778...|\n",
      "|4785099261198991361|[-0.0734794786457...|\n",
      "|4785099262272733185|[-0.0826209157594...|\n",
      "|4785099261312237569|[-0.0314119941360...|\n",
      "|4785099261698113537|[0.09288054583140...|\n",
      "|4785099261375152129|[-0.0158094335465...|\n",
      "|4785099261291266049|[-0.0167998276466...|\n",
      "|4785099261928800257|[0.02576889668016...|\n",
      "|4785099262687969281|[0.05944621129818...|\n",
      "|4785099261404512257|[0.09840873841564...|\n",
      "|4785099262079795201|[0.03895516407571...|\n",
      "+-------------------+--------------------+\n",
      "only showing top 20 rows\n",
      "\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "                                                                                \r"
     ]
    }
   ],
   "source": [
    "def compute_avg_vectors(row):\n",
    "    x = 0\n",
    "    for i in row.vectors:\n",
    "        x += i\n",
    "    # 求平均值\n",
    "    return row.id, x / len(row.vectors)\n",
    " \n",
    "article_vector_s = article_keyword_vectors_s.rdd.map(compute_avg_vectors).toDF(['id', 'articlevector'])\n",
    "print(\"文章最终vector\",article_vector_s.take(10))\n",
    "article_vector_s.rdd.toDF(['id', 'articlevector']).show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "                                                                                \r"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "文章最终vector [Row(id='4784374257956159489', articlevector=DenseVector([-0.2495, -0.3245, -0.2571, 0.9271, -0.1413, -0.4399, -0.1345, 0.5919, -0.5449, -0.3487, 0.3694, -0.3413, 0.1615, -0.0218, -0.6911, -0.4923, 0.3532, 0.0652, -0.6641, -0.3655, 0.5776, 0.7565, -0.2929, 0.0819, -0.3688, -0.189, -0.0753, 0.3251, -0.182, 0.0304, 0.6785, 1.0668, 0.4136, -0.0594, 0.1045, 0.4347, -0.0504, 0.259, -0.3802, 0.3411, -0.0485, 0.7665, -0.3077, 0.4776, 0.9927, 0.4312, 0.5491, 0.4591, -0.4816, -0.1873, -0.424, -0.2551, -0.526, -0.1479, -0.3109, -0.1367, 0.2259, -0.0705, -0.4614, -0.1992, -0.051, 0.7245, -0.6448, -0.522])), Row(id='4784714419076595713', articlevector=DenseVector([-0.3656, -0.6694, -0.2031, -0.1164, 0.0154, -0.4783, -0.5864, 0.8499, -0.4138, -0.0599, 0.6007, 0.0029, -0.5413, -0.8016, -0.4617, 0.393, -0.4996, -0.4029, 0.1584, -0.8071, 0.3661, 0.6632, -0.3227, 0.4926, 0.1402, -0.2529, -0.508, 0.9996, -0.5286, 0.076, 0.9919, 0.5763, 0.5889, 0.1452, 0.4121, 0.3919, -0.2046, 0.2041, 0.2182, 0.3484, 0.0028, 0.9016, 0.1184, 0.546, 1.0047, 0.5737, 0.2743, 0.9748, -0.2489, -0.5208, -0.5047, -0.2957, 0.1308, -0.1605, -0.0392, -0.6465, 0.1048, -0.1738, -0.3937, -1.146, 0.3759, 1.0984, -0.7352, -0.7834])), Row(id='4784374257670946817', articlevector=DenseVector([0.0902, -0.2058, -0.0212, -0.0821, 0.0351, -0.11, -0.0247, 0.244, -0.3008, 0.0045, 0.1817, 0.0802, -0.0502, -0.127, 0.0049, -0.0065, 0.0326, -0.0344, -0.0594, -0.2175, 0.2032, 0.2416, -0.0627, 0.0422, 0.0328, -0.0236, -0.0302, 0.1703, -0.1781, 0.1046, 0.4791, 0.2317, 0.1563, 0.1507, 0.1849, 0.1116, -0.0624, 0.0309, 0.0729, 0.1703, -0.0043, 0.2966, -0.0026, 0.1143, 0.3414, 0.1431, 0.0103, 0.2659, -0.0338, -0.14, -0.033, -0.273, 0.009, -0.0657, 0.0464, -0.1392, 0.0006, -0.0748, -0.0475, -0.1311, 0.0369, 0.2426, -0.2794, -0.1144])), Row(id='4784374253984153601', articlevector=DenseVector([0.0554, 0.0761, 0.0427, 0.0182, 0.0056, 0.021, 0.0416, -0.0371, -0.0378, -0.0635, -0.004, 0.0355, 0.0387, 0.0624, -0.0239, -0.0454, 0.0763, 0.0391, -0.0214, 0.0255, 0.0153, 0.0066, 0.0017, -0.0389, -0.0282, -0.0025, 0.0603, -0.0679, 0.0041, -0.0246, -0.0048, 0.0115, 0.0167, -0.0555, -0.0099, -0.0283, -0.0021, 0.0095, -0.0468, -0.0279, -0.0355, -0.0123, -0.0066, -0.0409, -0.0161, 0.0023, -0.0106, -0.0516, 0.0241, 0.0451, 0.045, -0.045, -0.0052, -0.0051, 0.0035, 0.0948, -0.0248, 0.0096, 0.0123, 0.0492, -0.0433, -0.0466, 0.0205, 0.0381])), Row(id='4784714422524313601', articlevector=DenseVector([0.0544, -0.0623, -0.0838, 0.0307, -0.0463, 0.0067, 0.0506, -0.0145, 0.0158, 0.0351, -0.0304, -0.0628, 0.1081, 0.035, 0.0047, -0.0771, 0.0587, -0.0052, -0.1162, 0.0716, 0.0879, -0.0041, 0.0078, -0.0205, -0.0391, 0.0645, -0.0151, -0.0931, -0.0645, 0.0842, 0.0212, 0.0219, 0.0107, 0.0594, -0.0042, 0.0079, 0.0523, 0.0926, -0.0208, 0.0467, 0.0637, -0.0006, 0.0318, 0.0917, 0.0506, -0.0584, 0.048, 0.0542, -0.0113, -0.0095, -0.0047, -0.002, -0.0608, -0.0792, -0.0082, 0.0078, -0.0062, 0.013, -0.0053, 0.053, 0.0152, -0.0282, -0.0149, 0.0235])), Row(id='4784714419198230529', articlevector=DenseVector([0.1279, 0.8032, 0.0855, 0.9392, -0.3522, -0.0339, 0.4322, 0.0462, 0.3601, -0.3252, -0.158, -0.0451, 0.7127, 0.3365, -0.3671, -0.5747, 0.3841, 0.3526, -0.6485, 0.3341, -0.2197, 0.0847, 0.1221, -0.6109, -0.2746, -0.3811, 0.3693, -0.5529, 0.0748, -0.347, -0.8058, 0.3893, -0.197, -0.7347, -0.2035, -0.2066, 0.2684, 0.0562, -0.6308, -0.4352, -0.347, -0.0454, -0.4632, -0.3342, -0.4667, -0.033, 0.2701, -0.6381, -0.4429, 0.6292, 0.3544, 0.1332, -0.2265, 0.0842, -0.4262, 0.8487, 0.0871, 0.1946, -0.1317, 0.7604, -0.3992, -0.2325, 0.5022, 0.0214])), Row(id='4784374250708402177', articlevector=DenseVector([0.1121, 0.0057, 0.0976, -0.0871, -0.0374, -0.0385, -0.0006, 0.0979, -0.0926, -0.0218, 0.0275, 0.0401, 0.0614, 0.0591, -0.0125, 0.0208, 0.0149, 0.0539, -0.058, -0.0287, 0.1142, 0.0509, -0.028, -0.0074, 0.02, 0.0583, 0.0343, -0.0499, -0.077, 0.0505, 0.1112, 0.0606, 0.0718, -0.0016, 0.0014, 0.0329, 0.0557, -0.035, -0.0486, 0.0151, -0.0747, 0.0277, -0.0505, 0.0393, 0.1445, 0.0975, -0.0486, 0.0252, -0.0152, 0.0053, 0.0374, -0.105, 0.0695, -0.0871, 0.1004, 0.0764, -0.0481, 0.0189, -0.035, 0.0632, 0.0509, 0.0694, -0.0337, -0.0131])), Row(id='4784714419248562177', articlevector=DenseVector([-0.2452, 0.0203, -0.0691, 0.209, -0.1097, 0.106, -0.0509, -0.1964, 0.4486, -0.0854, -0.0993, -0.248, 0.0653, -0.1473, -0.2828, 0.0586, 0.0053, -0.0856, -0.0928, 0.2241, -0.1276, -0.1781, 0.0111, 0.0704, -0.0908, 0.066, -0.0064, -0.0445, 0.0567, -0.1958, -0.289, 0.0377, -0.1046, -0.1312, -0.1351, -0.1169, 0.0868, 0.0685, -0.1509, -0.2292, 0.1111, -0.2144, -0.1006, 0.2111, -0.1242, -0.0972, 0.1236, -0.089, -0.2954, 0.0813, 0.0313, 0.2868, -0.0088, -0.1457, -0.1574, -0.0716, 0.0515, 0.1749, -0.1367, -0.0253, 0.1885, -0.0616, 0.0852, -0.117])), Row(id='4784374254651047937', articlevector=DenseVector([0.0186, -0.0052, 0.0291, 0.1023, -0.0613, -0.0147, 0.0799, 0.0576, -0.0336, -0.0344, 0.0503, -0.037, 0.0869, 0.0325, -0.0004, -0.0941, 0.0655, -0.0264, -0.1072, -0.005, -0.0144, 0.0364, 0.0194, 0.008, -0.034, 0.0167, 0.0088, -0.0007, 0.0148, 0.0397, 0.0527, 0.0058, -0.0448, -0.0426, 0.0455, 0.0343, -0.0243, -0.0329, -0.0559, -0.0483, 0.0064, 0.038, -0.0022, 0.0468, -0.0177, 0.046, -0.0158, 0.0325, -0.0179, -0.0033, -0.037, -0.0225, -0.0725, -0.0565, -0.0319, 0.0337, 0.0186, -0.0009, -0.0267, 0.0217, 0.0102, 0.0078, -0.0492, -0.0185])), Row(id='4784374255355691009', articlevector=DenseVector([0.1073, 0.1664, 0.0726, 0.1087, -0.0042, 0.0103, 0.0957, -0.0372, 0.016, -0.017, -0.047, 0.0449, 0.0755, 0.0989, -0.0029, -0.0817, 0.0853, 0.0849, -0.0971, 0.0207, -0.0191, 0.0236, 0.0081, -0.1001, -0.0352, -0.0245, 0.0831, -0.1582, 0.0447, -0.0518, -0.1466, 0.0308, -0.0294, -0.0968, -0.072, -0.0259, -0.021, -0.0343, -0.125, -0.0469, -0.1026, -0.0133, -0.0746, -0.1007, -0.11, -0.0049, 0.0254, -0.1349, -0.035, 0.0931, 0.1222, -0.0288, -0.0178, 0.044, -0.0158, 0.1831, -0.0242, 0.0333, 0.0128, 0.1491, -0.051, -0.108, 0.0873, 0.0556]))]\n",
      "+-------------------+--------------------+\n",
      "|                 id|       articlevector|\n",
      "+-------------------+--------------------+\n",
      "|4784374257956159489|[-0.2495005178127...|\n",
      "|4784714419076595713|[-0.3655561696666...|\n",
      "|4784374257670946817|[0.09024319180070...|\n",
      "|4784374253984153601|[0.05539106224961...|\n",
      "|4784714422524313601|[0.05438648855485...|\n",
      "|4784714419198230529|[0.12794039475042...|\n",
      "|4784374250708402177|[0.11205882283933...|\n",
      "|4784714419248562177|[-0.2451529804237...|\n",
      "|4784374254651047937|[0.01860215120210...|\n",
      "|4784374255355691009|[0.10731627750794...|\n",
      "|4784714421907750913|[3.10829261504108...|\n",
      "|4784374252839108609|[-0.0092851629627...|\n",
      "|4784714421161164801|[-1.5821069749072...|\n",
      "|4784374254663630849|[0.07389197451048...|\n",
      "|4784374250741956609|[-0.0460137771542...|\n",
      "|4784374257259905025|[0.05009001632019...|\n",
      "|4784374257335402497|[-0.0113726751036...|\n",
      "|4784374257087938561|[0.14660846810662...|\n",
      "|4784714421467348993|[-0.0129459188759...|\n",
      "|4784714421203107841|[-0.0136806867973...|\n",
      "+-------------------+--------------------+\n",
      "only showing top 20 rows\n",
      "\n"
     ]
    }
   ],
   "source": [
    "article_vector_e = article_keyword_vectors_e.rdd.map(compute_avg_vectors).toDF(['id', 'articlevector'])\n",
    "print(\"文章最终vector\",article_vector_e.take(10))\n",
    "article_vector_e.rdd.toDF(['id', 'articlevector']).show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[Stage 136:===================================================> (194 + 2) / 200]\r"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "+--------------------+--------------------+-------------------+\n",
      "|            datasetA|            datasetB|  EuclideanDistance|\n",
      "+--------------------+--------------------+-------------------+\n",
      "|{4784374253984153...|{4785099262440505...| 1.3590554503428725|\n",
      "|{4784374252839108...|{4785099261291266...| 0.2701617548392899|\n",
      "|{4784374252839108...|{4785099261299654...|0.20907640945550027|\n",
      "|{4784374257335402...|{4785099261974937...|0.37564831225368117|\n",
      "|{4784374257335402...|{4785099262096572...| 0.6739678996437154|\n",
      "|{4784714421467348...|{4785099262549557...|0.47656323850803917|\n",
      "|{4784714421467348...|{4785099262419533...| 0.6641122755339596|\n",
      "|{4784374255800287...|{4785099261354180...|   0.38635520553653|\n",
      "|{4784374256408461...|{4785099261186408...| 0.5686132837226386|\n",
      "|{4784374257347985...|{4785099261798776...| 0.3497807930605667|\n",
      "|{4784374254453915...|{4785099262725718...| 0.5390951375461235|\n",
      "|{4784714421945499...|{4785099261291266...| 0.8950725343497655|\n",
      "|{4784714421945499...|{4785099261878468...| 1.4268566390704862|\n",
      "|{4784374256802725...|{4785099261052190...| 0.8610976386254321|\n",
      "|{4784374256802725...|{4785099262616666...|0.29344863015060746|\n",
      "|{4784374253619249...|{4785099262209818...|0.42704094175837026|\n",
      "|{4784714422171992...|{4785099261148659...| 0.2555356593870808|\n",
      "|{4784714422171992...|{4785099262281121...| 0.6901349538865608|\n",
      "|{4784374257398317...|{4785099262725718...| 0.7441993592115511|\n",
      "|{4784714422297821...|{4785099261698113...| 1.4586034359766393|\n",
      "+--------------------+--------------------+-------------------+\n",
      "only showing top 20 rows\n",
      "\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "                                                                                \r"
     ]
    }
   ],
   "source": [
    "# LSH \n",
    "train_e = article_vector_e.select(['id', 'articlevector'])\n",
    "train_s = article_vector_s.select(['id', 'articlevector'])\n",
    " \n",
    "# 1.BucketedRandomProjectionLSH\n",
    "from pyspark.ml.feature import BucketedRandomProjectionLSHModel\n",
    "\n",
    "LSHmodel = BucketedRandomProjectionLSHModel.load(\"models/LSH.model\")\n",
    " \n",
    "similar = LSHmodel.approxSimilarityJoin(train_e, train_s, 2.0, distCol='EuclideanDistance')\n",
    "similar.show()\n",
    "\n",
    "# 2.MinHashLSH\n",
    "# brp = MinHashLSH(inputCol='articlevector', outputCol='hashes', numHashTables=4.0)\n",
    "# model = brp.fit(train)\n",
    " \n",
    "# 获取所有相似对\n",
    "# similar = model.approxSimilarityJoin(train, train, 2.0, distCol='EuclideanDistance')\n",
    "# similar.show()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "                                                                                \r"
     ]
    },
    {
     "data": {
      "text/plain": [
       "111154"
      ]
     },
     "execution_count": 28,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "similar.count()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "                                                                                \r"
     ]
    }
   ],
   "source": [
    "similar_list =  similar.collect()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'datasetA': Row(id='4784374253984153601', articlevector=DenseVector([0.0554, 0.0761, 0.0427, 0.0182, 0.0056, 0.021, 0.0416, -0.0371, -0.0378, -0.0635, -0.004, 0.0355, 0.0387, 0.0624, -0.0239, -0.0454, 0.0763, 0.0391, -0.0214, 0.0255, 0.0153, 0.0066, 0.0017, -0.0389, -0.0282, -0.0025, 0.0603, -0.0679, 0.0041, -0.0246, -0.0048, 0.0115, 0.0167, -0.0555, -0.0099, -0.0283, -0.0021, 0.0095, -0.0468, -0.0279, -0.0355, -0.0123, -0.0066, -0.0409, -0.0161, 0.0023, -0.0106, -0.0516, 0.0241, 0.0451, 0.045, -0.045, -0.0052, -0.0051, 0.0035, 0.0948, -0.0248, 0.0096, 0.0123, 0.0492, -0.0433, -0.0466, 0.0205, 0.0381]), hashes=[DenseVector([-1.0]), DenseVector([-1.0]), DenseVector([-1.0]), DenseVector([0.0])]),\n",
       " 'datasetB': Row(id='4785099262440505345', articlevector=DenseVector([0.1802, -0.1482, -0.0932, 0.3011, -0.0904, -0.0567, 0.1779, 0.1186, -0.3695, -0.1019, 0.0551, -0.0875, 0.2452, 0.2924, -0.0317, -0.4299, 0.404, 0.12, -0.4802, 0.1027, 0.311, 0.2168, -0.0141, -0.2729, -0.1762, -0.027, 0.0895, -0.2051, -0.1148, 0.1028, 0.1817, 0.3072, -0.0106, 0.0347, 0.1303, 0.097, 0.1399, 0.0466, -0.0582, 0.0867, 0.0605, 0.1139, -0.0746, 0.0522, 0.1847, 0.0634, 0.1385, 0.0624, -0.0135, 0.0429, 0.0276, -0.3183, -0.3628, -0.0288, -0.0827, 0.1443, 0.059, -0.0671, -0.0233, 0.2799, -0.1988, 0.0177, -0.1315, 0.1324]), hashes=[DenseVector([-1.0]), DenseVector([-1.0]), DenseVector([-1.0]), DenseVector([-1.0])]),\n",
       " 'EuclideanDistance': 1.3590554503428725}"
      ]
     },
     "execution_count": 30,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "similar_list[0].asDict()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[{'datasetA': '4784374253984153601',\n",
       "  'datasetB': '4785099262440505345',\n",
       "  'EuclideanDistance': 1.3590554503428725},\n",
       " {'datasetA': '4784374252839108609',\n",
       "  'datasetB': '4785099261291266049',\n",
       "  'EuclideanDistance': 0.2701617548392899},\n",
       " {'datasetA': '4784374252839108609',\n",
       "  'datasetB': '4785099261299654657',\n",
       "  'EuclideanDistance': 0.20907640945550027},\n",
       " {'datasetA': '4784374257335402497',\n",
       "  'datasetB': '4785099261974937601',\n",
       "  'EuclideanDistance': 0.37564831225368117},\n",
       " {'datasetA': '4784374257335402497',\n",
       "  'datasetB': '4785099262096572417',\n",
       "  'EuclideanDistance': 0.6739678996437154},\n",
       " {'datasetA': '4784714421467348993',\n",
       "  'datasetB': '4785099262549557249',\n",
       "  'EuclideanDistance': 0.47656323850803917},\n",
       " {'datasetA': '4784714421467348993',\n",
       "  'datasetB': '4785099262419533825',\n",
       "  'EuclideanDistance': 0.6641122755339596},\n",
       " {'datasetA': '4784374255800287233',\n",
       "  'datasetB': '4785099261354180609',\n",
       "  'EuclideanDistance': 0.38635520553653},\n",
       " {'datasetA': '4784374256408461313',\n",
       "  'datasetB': '4785099261186408449',\n",
       "  'EuclideanDistance': 0.5686132837226386},\n",
       " {'datasetA': '4784374257347985409',\n",
       "  'datasetB': '4785099261798776833',\n",
       "  'EuclideanDistance': 0.3497807930605667},\n",
       " {'datasetA': '4784374254453915649',\n",
       "  'datasetB': '4785099262725718017',\n",
       "  'EuclideanDistance': 0.5390951375461235},\n",
       " {'datasetA': '4784714421945499649',\n",
       "  'datasetB': '4785099261291266049',\n",
       "  'EuclideanDistance': 0.8950725343497655},\n",
       " {'datasetA': '4784714421945499649',\n",
       "  'datasetB': '4785099261878468609',\n",
       "  'EuclideanDistance': 1.4268566390704862},\n",
       " {'datasetA': '4784374256802725889',\n",
       "  'datasetB': '4785099261052190721',\n",
       "  'EuclideanDistance': 0.8610976386254321},\n",
       " {'datasetA': '4784374256802725889',\n",
       "  'datasetB': '4785099262616666113',\n",
       "  'EuclideanDistance': 0.29344863015060746},\n",
       " {'datasetA': '4784374253619249153',\n",
       "  'datasetB': '4785099262209818625',\n",
       "  'EuclideanDistance': 0.42704094175837026},\n",
       " {'datasetA': '4784714422171992065',\n",
       "  'datasetB': '4785099261148659713',\n",
       "  'EuclideanDistance': 0.2555356593870808},\n",
       " {'datasetA': '4784714422171992065',\n",
       "  'datasetB': '4785099262281121793',\n",
       "  'EuclideanDistance': 0.6901349538865608},\n",
       " {'datasetA': '4784374257398317057',\n",
       "  'datasetB': '4785099262725718017',\n",
       "  'EuclideanDistance': 0.7441993592115511},\n",
       " {'datasetA': '4784714422297821185',\n",
       "  'datasetB': '4785099261698113537',\n",
       "  'EuclideanDistance': 1.4586034359766393},\n",
       " {'datasetA': '4784374254273560577',\n",
       "  'datasetB': '4785099262327259137',\n",
       "  'EuclideanDistance': 0.79736548400482},\n",
       " {'datasetA': '4784374254273560577',\n",
       "  'datasetB': '4785099261433872385',\n",
       "  'EuclideanDistance': 0.5702801713959172},\n",
       " {'datasetA': '4784374256974692353',\n",
       "  'datasetB': '4785099262520197121',\n",
       "  'EuclideanDistance': 1.3103962351322638},\n",
       " {'datasetA': '4784374256693673985',\n",
       "  'datasetB': '4785099261698113537',\n",
       "  'EuclideanDistance': 0.4354774114711241},\n",
       " {'datasetA': '4784374256693673985',\n",
       "  'datasetB': '4785099261249323009',\n",
       "  'EuclideanDistance': 0.7805009833738955},\n",
       " {'datasetA': '4784374257452843009',\n",
       "  'datasetB': '4785099262318870529',\n",
       "  'EuclideanDistance': 0.8340972751648859},\n",
       " {'datasetA': '4784374258224594945',\n",
       "  'datasetB': '4785099261584867329',\n",
       "  'EuclideanDistance': 0.40999288216517277},\n",
       " {'datasetA': '4784374256345546753',\n",
       "  'datasetB': '4785099262193041409',\n",
       "  'EuclideanDistance': 0.6100699155793634},\n",
       " {'datasetA': '4784374256345546753',\n",
       "  'datasetB': '4785099261354180609',\n",
       "  'EuclideanDistance': 0.27136935552856645},\n",
       " {'datasetA': '4784374256345546753',\n",
       "  'datasetB': '4785099262247567361',\n",
       "  'EuclideanDistance': 0.43606877134003247},\n",
       " {'datasetA': '4784374250758733825',\n",
       "  'datasetB': '4785099262088183809',\n",
       "  'EuclideanDistance': 0.4858787353341942},\n",
       " {'datasetA': '4784374256232300545',\n",
       "  'datasetB': '4785099262121738241',\n",
       "  'EuclideanDistance': 0.44830061712770436},\n",
       " {'datasetA': '4784374256232300545',\n",
       "  'datasetB': '4785099262541168641',\n",
       "  'EuclideanDistance': 0.9359389160806235},\n",
       " {'datasetA': '4784374251400462337',\n",
       "  'datasetB': '4785099261815554049',\n",
       "  'EuclideanDistance': 0.44566652373431187},\n",
       " {'datasetA': '4784714419215007745',\n",
       "  'datasetB': '4785099261928800257',\n",
       "  'EuclideanDistance': 0.5438736200948405},\n",
       " {'datasetA': '4784714419215007745',\n",
       "  'datasetB': '4785099261073162241',\n",
       "  'EuclideanDistance': 0.32990328949375586},\n",
       " {'datasetA': '4784714422268461057',\n",
       "  'datasetB': '4785099262541168641',\n",
       "  'EuclideanDistance': 1.0458104333552893},\n",
       " {'datasetA': '4784374250934894593',\n",
       "  'datasetB': '4785099261480009729',\n",
       "  'EuclideanDistance': 0.914115570968005},\n",
       " {'datasetA': '4784374250934894593',\n",
       "  'datasetB': '4785099262419533825',\n",
       "  'EuclideanDistance': 0.5924309355131917},\n",
       " {'datasetA': '4784374250934894593',\n",
       "  'datasetB': '4785099262130126849',\n",
       "  'EuclideanDistance': 0.43191280372167656},\n",
       " {'datasetA': '4784714421765144577',\n",
       "  'datasetB': '4785099261157048321',\n",
       "  'EuclideanDistance': 0.3665790889402079},\n",
       " {'datasetA': '4784714419835764737',\n",
       "  'datasetB': '4785099262633443329',\n",
       "  'EuclideanDistance': 0.9405297186061876},\n",
       " {'datasetA': '4784714419835764737',\n",
       "  'datasetB': '4785099261593255937',\n",
       "  'EuclideanDistance': 0.2073781491975064},\n",
       " {'datasetA': '4784714421421211649',\n",
       "  'datasetB': '4785099261463232513',\n",
       "  'EuclideanDistance': 0.757803896630932},\n",
       " {'datasetA': '4784374253585694721',\n",
       "  'datasetB': '4785099262578917377',\n",
       "  'EuclideanDistance': 0.5324552905650621},\n",
       " {'datasetA': '4784374253585694721',\n",
       "  'datasetB': '4785099262201430017',\n",
       "  'EuclideanDistance': 0.7353935989054845},\n",
       " {'datasetA': '4784374253443088385',\n",
       "  'datasetB': '4785099262021074945',\n",
       "  'EuclideanDistance': 0.8814958418538353},\n",
       " {'datasetA': '4784374253443088385',\n",
       "  'datasetB': '4785099261433872385',\n",
       "  'EuclideanDistance': 0.49360385124405387},\n",
       " {'datasetA': '4784374251475959809',\n",
       "  'datasetB': '4785099262641831937',\n",
       "  'EuclideanDistance': 0.31365643023725626},\n",
       " {'datasetA': '4784714422381707265',\n",
       "  'datasetB': '4785099261974937601',\n",
       "  'EuclideanDistance': 0.7104512508784435},\n",
       " {'datasetA': '4784714419642826753',\n",
       "  'datasetB': '4785099261790388225',\n",
       "  'EuclideanDistance': 0.4682838844785343},\n",
       " {'datasetA': '4784374253711523841',\n",
       "  'datasetB': '4785099262595694593',\n",
       "  'EuclideanDistance': 0.40776579063025503},\n",
       " {'datasetA': '4784374250796482561',\n",
       "  'datasetB': '4785099261480009729',\n",
       "  'EuclideanDistance': 0.9396246901740717},\n",
       " {'datasetA': '4784374256291020801',\n",
       "  'datasetB': '4785099262138515457',\n",
       "  'EuclideanDistance': 0.5816852747987974},\n",
       " {'datasetA': '4784374256291020801',\n",
       "  'datasetB': '4785099262155292673',\n",
       "  'EuclideanDistance': 0.6352294328300437},\n",
       " {'datasetA': '4784374250767122433',\n",
       "  'datasetB': '4785099262096572417',\n",
       "  'EuclideanDistance': 0.8316398822335658},\n",
       " {'datasetA': '4784374256416849921',\n",
       "  'datasetB': '4785099262578917377',\n",
       "  'EuclideanDistance': 0.638051227984896},\n",
       " {'datasetA': '4784374256416849921',\n",
       "  'datasetB': '4785099261064773633',\n",
       "  'EuclideanDistance': 0.735494180787829},\n",
       " {'datasetA': '4784374256416849921',\n",
       "  'datasetB': '4785099262486642689',\n",
       "  'EuclideanDistance': 0.28226665127165557},\n",
       " {'datasetA': '4784374256416849921',\n",
       "  'datasetB': '4785099262562140161',\n",
       "  'EuclideanDistance': 0.7455424569702295},\n",
       " {'datasetA': '4784714422046162945',\n",
       "  'datasetB': '4785099262595694593',\n",
       "  'EuclideanDistance': 0.33348244813875416},\n",
       " {'datasetA': '4784714422046162945',\n",
       "  'datasetB': '4785099261480009729',\n",
       "  'EuclideanDistance': 0.9504366654740836},\n",
       " {'datasetA': '4784374253128515585',\n",
       "  'datasetB': '4785099261966548993',\n",
       "  'EuclideanDistance': 0.4286445709933437},\n",
       " {'datasetA': '4784374258245566465',\n",
       "  'datasetB': '4785099262713135105',\n",
       "  'EuclideanDistance': 0.3687085154719663},\n",
       " {'datasetA': '4784374251132026881',\n",
       "  'datasetB': '4785099262264344577',\n",
       "  'EuclideanDistance': 1.0826668913938868},\n",
       " {'datasetA': '4784714422314598401',\n",
       "  'datasetB': '4785099262272733185',\n",
       "  'EuclideanDistance': 0.7012653876132335},\n",
       " {'datasetA': '4784714421819670529',\n",
       "  'datasetB': '4785099262499225601',\n",
       "  'EuclideanDistance': 0.5170914845690996},\n",
       " {'datasetA': '4784374257901633537',\n",
       "  'datasetB': '4785099262264344577',\n",
       "  'EuclideanDistance': 0.6801859655343445},\n",
       " {'datasetA': '4784374257901633537',\n",
       "  'datasetB': '4785099261031219201',\n",
       "  'EuclideanDistance': 0.4191047669630011},\n",
       " {'datasetA': '4784374257901633537',\n",
       "  'datasetB': '4785099261442260993',\n",
       "  'EuclideanDistance': 0.5765669896649943},\n",
       " {'datasetA': '4784374253661192193',\n",
       "  'datasetB': '4785099261064773633',\n",
       "  'EuclideanDistance': 0.34763123377454164},\n",
       " {'datasetA': '4784374257863884801',\n",
       "  'datasetB': '4785099262209818625',\n",
       "  'EuclideanDistance': 0.3277708992633554},\n",
       " {'datasetA': '4784374255611543553',\n",
       "  'datasetB': '4785099262193041409',\n",
       "  'EuclideanDistance': 1.1104520020613278},\n",
       " {'datasetA': '4784714419315671041',\n",
       "  'datasetB': '4785099261425483777',\n",
       "  'EuclideanDistance': 0.31018591092964104},\n",
       " {'datasetA': '4784714419525386241',\n",
       "  'datasetB': '4785099262440505345',\n",
       "  'EuclideanDistance': 1.606821330622186},\n",
       " {'datasetA': '4784374254684602369',\n",
       "  'datasetB': '4785099261916217345',\n",
       "  'EuclideanDistance': 0.31009821660492803},\n",
       " {'datasetA': '4784374254684602369',\n",
       "  'datasetB': '4785099261593255937',\n",
       "  'EuclideanDistance': 0.21367126683370607},\n",
       " {'datasetA': '4784714421681258497',\n",
       "  'datasetB': '4785099262222401537',\n",
       "  'EuclideanDistance': 0.26401894101429646},\n",
       " {'datasetA': '4784714422239100929',\n",
       "  'datasetB': '4785099262549557249',\n",
       "  'EuclideanDistance': 0.3391642797587199},\n",
       " {'datasetA': '4784374253191430145',\n",
       "  'datasetB': '4785099261064773633',\n",
       "  'EuclideanDistance': 0.7454908614750794},\n",
       " {'datasetA': '4784374253191430145',\n",
       "  'datasetB': '4785099262541168641',\n",
       "  'EuclideanDistance': 1.1717285771509247},\n",
       " {'datasetA': '4784374255099838465',\n",
       "  'datasetB': '4785099262578917377',\n",
       "  'EuclideanDistance': 0.40771189254734896},\n",
       " {'datasetA': '4784374255099838465',\n",
       "  'datasetB': '4785099261878468609',\n",
       "  'EuclideanDistance': 1.2477339337453885},\n",
       " {'datasetA': '4784374255099838465',\n",
       "  'datasetB': '4785099261186408449',\n",
       "  'EuclideanDistance': 0.38710402256307075},\n",
       " {'datasetA': '4784374251446599681',\n",
       "  'datasetB': '4785099262448893953',\n",
       "  'EuclideanDistance': 0.5862572269230933},\n",
       " {'datasetA': '4784374251446599681',\n",
       "  'datasetB': '4785099261249323009',\n",
       "  'EuclideanDistance': 0.31226305245144265},\n",
       " {'datasetA': '4784714419768655873',\n",
       "  'datasetB': '4785099261958160385',\n",
       "  'EuclideanDistance': 0.3542435022809351},\n",
       " {'datasetA': '4784714419122733057',\n",
       "  'datasetB': '4785099262222401537',\n",
       "  'EuclideanDistance': 0.848226225509285},\n",
       " {'datasetA': '4784714419122733057',\n",
       "  'datasetB': '4785099261433872385',\n",
       "  'EuclideanDistance': 0.839045748609284},\n",
       " {'datasetA': '4784714419667992577',\n",
       "  'datasetB': '4785099262461476865',\n",
       "  'EuclideanDistance': 0.6017606032891227},\n",
       " {'datasetA': '4784714419533774849',\n",
       "  'datasetB': '4785099262658609153',\n",
       "  'EuclideanDistance': 0.6650112719641094},\n",
       " {'datasetA': '4784374254495858689',\n",
       "  'datasetB': '4785099262155292673',\n",
       "  'EuclideanDistance': 0.6312689500056888},\n",
       " {'datasetA': '4784374254495858689',\n",
       "  'datasetB': '4785099261031219201',\n",
       "  'EuclideanDistance': 0.785401486489294},\n",
       " {'datasetA': '4784374256152608769',\n",
       "  'datasetB': '4785099261916217345',\n",
       "  'EuclideanDistance': 0.6197045171153905},\n",
       " {'datasetA': '4784374257050189825',\n",
       "  'datasetB': '4785099262419533825',\n",
       "  'EuclideanDistance': 0.8842423314216906},\n",
       " {'datasetA': '4784374254541996033',\n",
       "  'datasetB': '4785099261186408449',\n",
       "  'EuclideanDistance': 0.5856100660990076},\n",
       " {'datasetA': '4784714421962276865',\n",
       "  'datasetB': '4785099261798776833',\n",
       "  'EuclideanDistance': 0.2883587542109467},\n",
       " {'datasetA': '4784714421962276865',\n",
       "  'datasetB': '4785099262641831937',\n",
       "  'EuclideanDistance': 0.22148284989152323},\n",
       " {'datasetA': '4784714419235979265',\n",
       "  'datasetB': '4785099262578917377',\n",
       "  'EuclideanDistance': 0.5116985613698416},\n",
       " {'datasetA': '4784714419235979265',\n",
       "  'datasetB': '4785099262138515457',\n",
       "  'EuclideanDistance': 0.4837349551959451},\n",
       " {'datasetA': '4784714419462471681',\n",
       "  'datasetB': '4785099262201430017',\n",
       "  'EuclideanDistance': 0.4926897306122219},\n",
       " {'datasetA': '4784714421211496449',\n",
       "  'datasetB': '4785099261064773633',\n",
       "  'EuclideanDistance': 0.493948942384765},\n",
       " {'datasetA': '4784374255821258753',\n",
       "  'datasetB': '4785099262578917377',\n",
       "  'EuclideanDistance': 0.2892684038932555},\n",
       " {'datasetA': '4784374255821258753',\n",
       "  'datasetB': '4785099262042046465',\n",
       "  'EuclideanDistance': 0.3879554300027758},\n",
       " {'datasetA': '4784374257184407553',\n",
       "  'datasetB': '4785099261106716673',\n",
       "  'EuclideanDistance': 0.7485425356811922},\n",
       " {'datasetA': '4784374257184407553',\n",
       "  'datasetB': '4785099261756833793',\n",
       "  'EuclideanDistance': 0.480698876501826},\n",
       " {'datasetA': '4784374256597204993',\n",
       "  'datasetB': '4785099262146904065',\n",
       "  'EuclideanDistance': 0.9021283794011008},\n",
       " {'datasetA': '4784374256597204993',\n",
       "  'datasetB': '4785099262247567361',\n",
       "  'EuclideanDistance': 0.7333566589427818},\n",
       " {'datasetA': '4784714421991636993',\n",
       "  'datasetB': '4785099261836525569',\n",
       "  'EuclideanDistance': 0.35035248187757323},\n",
       " {'datasetA': '4784374253392756737',\n",
       "  'datasetB': '4785099261354180609',\n",
       "  'EuclideanDistance': 0.45944859267270666},\n",
       " {'datasetA': '4784374250951671809',\n",
       "  'datasetB': '4785099262570528769',\n",
       "  'EuclideanDistance': 0.3588972596467647},\n",
       " {'datasetA': '4784374253719912449',\n",
       "  'datasetB': '4785099262713135105',\n",
       "  'EuclideanDistance': 0.7469360084590697},\n",
       " {'datasetA': '4784374256219717633',\n",
       "  'datasetB': '4785099261157048321',\n",
       "  'EuclideanDistance': 0.27058988002605416},\n",
       " {'datasetA': '4784374255859007489',\n",
       "  'datasetB': '4785099261530341377',\n",
       "  'EuclideanDistance': 0.6959531218644536},\n",
       " {'datasetA': '4784374255653486593',\n",
       "  'datasetB': '4785099261622616065',\n",
       "  'EuclideanDistance': 0.301358300345827},\n",
       " {'datasetA': '4784374254474887169',\n",
       "  'datasetB': '4785099262608277505',\n",
       "  'EuclideanDistance': 0.7237327946035009},\n",
       " {'datasetA': '4784714422356541441',\n",
       "  'datasetB': '4785099262155292673',\n",
       "  'EuclideanDistance': 0.40849139764415493},\n",
       " {'datasetA': '4784714422356541441',\n",
       "  'datasetB': '4785099261995909121',\n",
       "  'EuclideanDistance': 0.3844959065465925},\n",
       " {'datasetA': '4784374251194941441',\n",
       "  'datasetB': '4785099261781999617',\n",
       "  'EuclideanDistance': 1.5215889510196003},\n",
       " {'datasetA': '4784374251194941441',\n",
       "  'datasetB': '4785099261115105281',\n",
       "  'EuclideanDistance': 1.4694760291462885},\n",
       " {'datasetA': '4784714421719007233',\n",
       "  'datasetB': '4785099261605838849',\n",
       "  'EuclideanDistance': 0.5059926030197228},\n",
       " {'datasetA': '4784714421719007233',\n",
       "  'datasetB': '4785099261631004673',\n",
       "  'EuclideanDistance': 0.2725179341014141},\n",
       " {'datasetA': '4784374257717084161',\n",
       "  'datasetB': '4785099262264344577',\n",
       "  'EuclideanDistance': 0.7013401834706477},\n",
       " {'datasetA': '4784374254223228929',\n",
       "  'datasetB': '4785099261756833793',\n",
       "  'EuclideanDistance': 0.34025990509235093},\n",
       " {'datasetA': '4784374256907583489',\n",
       "  'datasetB': '4785099261115105281',\n",
       "  'EuclideanDistance': 0.47047336291711533},\n",
       " {'datasetA': '4784714421899362305',\n",
       "  'datasetB': '4785099261115105281',\n",
       "  'EuclideanDistance': 0.7024428900446861},\n",
       " {'datasetA': '4784374258203623425',\n",
       "  'datasetB': '4785099262021074945',\n",
       "  'EuclideanDistance': 1.0925090114633187},\n",
       " {'datasetA': '4784374258203623425',\n",
       "  'datasetB': '4785099262633443329',\n",
       "  'EuclideanDistance': 0.9141059400837916},\n",
       " {'datasetA': '4784374254026096641',\n",
       "  'datasetB': '4785099262096572417',\n",
       "  'EuclideanDistance': 0.47940456198026626},\n",
       " {'datasetA': '4784374256647536641',\n",
       "  'datasetB': '4785099262658609153',\n",
       "  'EuclideanDistance': 0.9111926248150396},\n",
       " {'datasetA': '4784714419613466625',\n",
       "  'datasetB': '4785099262293704705',\n",
       "  'EuclideanDistance': 1.0185280227170956},\n",
       " {'datasetA': '4784714419059818497',\n",
       "  'datasetB': '4785099261052190721',\n",
       "  'EuclideanDistance': 0.48027685978878143},\n",
       " {'datasetA': '4784374254336475137',\n",
       "  'datasetB': '4785099262549557249',\n",
       "  'EuclideanDistance': 0.6491892934167074},\n",
       " {'datasetA': '4784374254336475137',\n",
       "  'datasetB': '4785099261261905921',\n",
       "  'EuclideanDistance': 0.4197637983102496},\n",
       " {'datasetA': '4784374256500736001',\n",
       "  'datasetB': '4785099262595694593',\n",
       "  'EuclideanDistance': 0.6529608786131114},\n",
       " {'datasetA': '4784714419131121665',\n",
       "  'datasetB': '4785099262650220545',\n",
       "  'EuclideanDistance': 0.5070127963961087},\n",
       " {'datasetA': '4784714419131121665',\n",
       "  'datasetB': '4785099262302093313',\n",
       "  'EuclideanDistance': 0.25983587586928125},\n",
       " {'datasetA': '4784374251295604737',\n",
       "  'datasetB': '4785099262687969281',\n",
       "  'EuclideanDistance': 1.6842308443588674},\n",
       " {'datasetA': '4784714422343958529',\n",
       "  'datasetB': '4785099261123493889',\n",
       "  'EuclideanDistance': 0.5422006791505977},\n",
       " {'datasetA': '4784374255754149889',\n",
       "  'datasetB': '4785099261291266049',\n",
       "  'EuclideanDistance': 0.5314401137127476},\n",
       " {'datasetA': '4784374255754149889',\n",
       "  'datasetB': '4785099261756833793',\n",
       "  'EuclideanDistance': 0.5858697362702304},\n",
       " {'datasetA': '4784374255754149889',\n",
       "  'datasetB': '4785099261995909121',\n",
       "  'EuclideanDistance': 0.4880602854451149},\n",
       " {'datasetA': '4784714419487637505',\n",
       "  'datasetB': '4785099261899440129',\n",
       "  'EuclideanDistance': 0.3854016021418696},\n",
       " {'datasetA': '4784714419143704577',\n",
       "  'datasetB': '4785099261958160385',\n",
       "  'EuclideanDistance': 0.4736398435394402},\n",
       " {'datasetA': '4784374255968059393',\n",
       "  'datasetB': '4785099262562140161',\n",
       "  'EuclideanDistance': 1.0230200033250647},\n",
       " {'datasetA': '4784714419370196993',\n",
       "  'datasetB': '4785099262448893953',\n",
       "  'EuclideanDistance': 0.5538517617079571},\n",
       " {'datasetA': '4784374258086182913',\n",
       "  'datasetB': '4785099261261905921',\n",
       "  'EuclideanDistance': 0.3740574426373558},\n",
       " {'datasetA': '4784374254864957441',\n",
       "  'datasetB': '4785099262570528769',\n",
       "  'EuclideanDistance': 0.7375337870961832},\n",
       " {'datasetA': '4784374257779998721',\n",
       "  'datasetB': '4785099262012686337',\n",
       "  'EuclideanDistance': 0.41372547962620565},\n",
       " {'datasetA': '4784374257779998721',\n",
       "  'datasetB': '4785099262725718017',\n",
       "  'EuclideanDistance': 0.5786140786967243},\n",
       " {'datasetA': '4784714422054551553',\n",
       "  'datasetB': '4785099261878468609',\n",
       "  'EuclideanDistance': 1.5938228165368242},\n",
       " {'datasetA': '4784374251530485761',\n",
       "  'datasetB': '4785099261463232513',\n",
       "  'EuclideanDistance': 1.2244072878179633},\n",
       " {'datasetA': '4784374255884173313',\n",
       "  'datasetB': '4785099262012686337',\n",
       "  'EuclideanDistance': 0.4019799098461568},\n",
       " {'datasetA': '4784714422536896513',\n",
       "  'datasetB': '4785099261631004673',\n",
       "  'EuclideanDistance': 1.6539564043923758},\n",
       " {'datasetA': '4784374256471375873',\n",
       "  'datasetB': '4785099261672947713',\n",
       "  'EuclideanDistance': 0.5929686499674509},\n",
       " {'datasetA': '4784374255896756225',\n",
       "  'datasetB': '4785099262578917377',\n",
       "  'EuclideanDistance': 0.6907518003295118},\n",
       " {'datasetA': '4784374255896756225',\n",
       "  'datasetB': '4785099261836525569',\n",
       "  'EuclideanDistance': 0.6111503856666642},\n",
       " {'datasetA': '4784374254718156801',\n",
       "  'datasetB': '4785099262671192065',\n",
       "  'EuclideanDistance': 1.09346457425845},\n",
       " {'datasetA': '4784374257998102529',\n",
       "  'datasetB': '4785099262201430017',\n",
       "  'EuclideanDistance': 0.9098586119638661},\n",
       " {'datasetA': '4784374254734934017',\n",
       "  'datasetB': '4785099262138515457',\n",
       "  'EuclideanDistance': 1.3879094388015036},\n",
       " {'datasetA': '4784374254005125121',\n",
       "  'datasetB': '4785099262671192065',\n",
       "  'EuclideanDistance': 1.0432014003455745},\n",
       " {'datasetA': '4784374254999175169',\n",
       "  'datasetB': '4785099262222401537',\n",
       "  'EuclideanDistance': 0.30956907768014563},\n",
       " {'datasetA': '4784374256962109441',\n",
       "  'datasetB': '4785099262578917377',\n",
       "  'EuclideanDistance': 0.5345915094523285},\n",
       " {'datasetA': '4784374254235811841',\n",
       "  'datasetB': '4785099262050435073',\n",
       "  'EuclideanDistance': 0.35149750907232535},\n",
       " {'datasetA': '4784374254328086529',\n",
       "  'datasetB': '4785099262339842049',\n",
       "  'EuclideanDistance': 0.40929371692853495},\n",
       " {'datasetA': '4784374254328086529',\n",
       "  'datasetB': '4785099262469865473',\n",
       "  'EuclideanDistance': 0.3989874495153708},\n",
       " {'datasetA': '4784714422478176257',\n",
       "  'datasetB': '4785099262469865473',\n",
       "  'EuclideanDistance': 0.596705069423061},\n",
       " {'datasetA': '4784714422478176257',\n",
       "  'datasetB': '4785099261643587585',\n",
       "  'EuclideanDistance': 1.1223816328075085},\n",
       " {'datasetA': '4784374255422799873',\n",
       "  'datasetB': '4785099262050435073',\n",
       "  'EuclideanDistance': 0.2853597000290676},\n",
       " {'datasetA': '4784374256110665729',\n",
       "  'datasetB': '4785099262725718017',\n",
       "  'EuclideanDistance': 0.5671526056552585},\n",
       " {'datasetA': '4784374254755905537',\n",
       "  'datasetB': '4785099261836525569',\n",
       "  'EuclideanDistance': 0.33526817769105344},\n",
       " {'datasetA': '4784374254755905537',\n",
       "  'datasetB': '4785099262222401537',\n",
       "  'EuclideanDistance': 0.22856148697019157},\n",
       " {'datasetA': '4784714419844153345',\n",
       "  'datasetB': '4785099261291266049',\n",
       "  'EuclideanDistance': 0.26584578763275857},\n",
       " {'datasetA': '4784714419844153345',\n",
       "  'datasetB': '4785099262021074945',\n",
       "  'EuclideanDistance': 0.9105780565322527},\n",
       " {'datasetA': '4784714419844153345',\n",
       "  'datasetB': '4785099262440505345',\n",
       "  'EuclideanDistance': 1.2723067261633747},\n",
       " {'datasetA': '4784714422083911681',\n",
       "  'datasetB': '4785099261790388225',\n",
       "  'EuclideanDistance': 1.0924208003554106},\n",
       " {'datasetA': '4784714422083911681',\n",
       "  'datasetB': '4785099261488398337',\n",
       "  'EuclideanDistance': 1.2473820120623298},\n",
       " {'datasetA': '4784374254638465025',\n",
       "  'datasetB': '4785099262050435073',\n",
       "  'EuclideanDistance': 0.354426429148009},\n",
       " {'datasetA': '4784374251245273089',\n",
       "  'datasetB': '4785099261899440129',\n",
       "  'EuclideanDistance': 0.8614369142790579},\n",
       " {'datasetA': '4784374254055456769',\n",
       "  'datasetB': '4785099262042046465',\n",
       "  'EuclideanDistance': 0.5270623002350151},\n",
       " {'datasetA': '4784374254055456769',\n",
       "  'datasetB': '4785099261219962881',\n",
       "  'EuclideanDistance': 1.816062745667093},\n",
       " {'datasetA': '4784374257620615169',\n",
       "  'datasetB': '4785099262167875585',\n",
       "  'EuclideanDistance': 0.23086808980203702},\n",
       " {'datasetA': '4784374255569600513',\n",
       "  'datasetB': '4785099262264344577',\n",
       "  'EuclideanDistance': 0.4697477023050488},\n",
       " {'datasetA': '4784714418980126721',\n",
       "  'datasetB': '4785099262461476865',\n",
       "  'EuclideanDistance': 0.6581548355295932},\n",
       " {'datasetA': '4784374252990103553',\n",
       "  'datasetB': '4785099261106716673',\n",
       "  'EuclideanDistance': 0.40441518667735377},\n",
       " {'datasetA': '4784714419386974209',\n",
       "  'datasetB': '4785099261094133761',\n",
       "  'EuclideanDistance': 0.6996822625924595},\n",
       " {'datasetA': '4784714419386974209',\n",
       "  'datasetB': '4785099261249323009',\n",
       "  'EuclideanDistance': 0.27944908132104895},\n",
       " {'datasetA': '4784714419177259009',\n",
       "  'datasetB': '4785099262155292673',\n",
       "  'EuclideanDistance': 0.3790560677745398},\n",
       " {'datasetA': '4784374254697185281',\n",
       "  'datasetB': '4785099262633443329',\n",
       "  'EuclideanDistance': 0.8761609863501495},\n",
       " {'datasetA': '4784374255917727745',\n",
       "  'datasetB': '4785099262528585729',\n",
       "  'EuclideanDistance': 0.5983261121368854},\n",
       " {'datasetA': '4784374251228495873',\n",
       "  'datasetB': '4785099261219962881',\n",
       "  'EuclideanDistance': 1.818002555561466},\n",
       " {'datasetA': '4784714419206619137',\n",
       "  'datasetB': '4785099261341597697',\n",
       "  'EuclideanDistance': 0.42041868618472367},\n",
       " {'datasetA': '4784714422373318657',\n",
       "  'datasetB': '4785099262528585729',\n",
       "  'EuclideanDistance': 0.6208622159138959},\n",
       " {'datasetA': '4784714422121660417',\n",
       "  'datasetB': '4785099261631004673',\n",
       "  'EuclideanDistance': 0.4979428250441535},\n",
       " {'datasetA': '4784714422121660417',\n",
       "  'datasetB': '4785099261593255937',\n",
       "  'EuclideanDistance': 0.4275511233767359},\n",
       " {'datasetA': '4784374251664703489',\n",
       "  'datasetB': '4785099262725718017',\n",
       "  'EuclideanDistance': 1.7851117730495087},\n",
       " {'datasetA': '4784374251664703489',\n",
       "  'datasetB': '4785099262616666113',\n",
       "  'EuclideanDistance': 1.4803761794723802},\n",
       " {'datasetA': '4784374251664703489',\n",
       "  'datasetB': '4785099262104961025',\n",
       "  'EuclideanDistance': 1.7878499905308352},\n",
       " {'datasetA': '4784374255015952385',\n",
       "  'datasetB': '4785099262209818625',\n",
       "  'EuclideanDistance': 0.8438345672118632},\n",
       " {'datasetA': '4784374255015952385',\n",
       "  'datasetB': '4785099261714890753',\n",
       "  'EuclideanDistance': 0.5941361410840921},\n",
       " {'datasetA': '4784374251119443969',\n",
       "  'datasetB': '4785099262272733185',\n",
       "  'EuclideanDistance': 0.7653789068655902},\n",
       " {'datasetA': '4784374251119443969',\n",
       "  'datasetB': '4785099262578917377',\n",
       "  'EuclideanDistance': 0.39240451562845363},\n",
       " {'datasetA': '4784374257788387329',\n",
       "  'datasetB': '4785099261815554049',\n",
       "  'EuclideanDistance': 0.5643513346445346},\n",
       " {'datasetA': '4784374255284387841',\n",
       "  'datasetB': '4785099261928800257',\n",
       "  'EuclideanDistance': 1.003245544994001},\n",
       " {'datasetA': '4784374255284387841',\n",
       "  'datasetB': '4785099262377590785',\n",
       "  'EuclideanDistance': 1.4317642788427962},\n",
       " {'datasetA': '4784374253816381441',\n",
       "  'datasetB': '4785099261781999617',\n",
       "  'EuclideanDistance': 0.3698814372926984},\n",
       " {'datasetA': '4784374256869834753',\n",
       "  'datasetB': '4785099261442260993',\n",
       "  'EuclideanDistance': 0.6585362002832229},\n",
       " {'datasetA': '4784374254424555521',\n",
       "  'datasetB': '4785099261219962881',\n",
       "  'EuclideanDistance': 1.8831243837361848},\n",
       " {'datasetA': '4784374256173580289',\n",
       "  'datasetB': '4785099261094133761',\n",
       "  'EuclideanDistance': 0.47924792001253613},\n",
       " {'datasetA': '4784374254890123265',\n",
       "  'datasetB': '4785099261115105281',\n",
       "  'EuclideanDistance': 0.43894578250417177},\n",
       " {'datasetA': '4784374256987275265',\n",
       "  'datasetB': '4785099261907828737',\n",
       "  'EuclideanDistance': 0.5856118211775044},\n",
       " {'datasetA': '4784374256030973953',\n",
       "  'datasetB': '4785099261878468609',\n",
       "  'EuclideanDistance': 1.2715698811473792},\n",
       " {'datasetA': '4784374256030973953',\n",
       "  'datasetB': '4785099261425483777',\n",
       "  'EuclideanDistance': 1.579926324266388},\n",
       " {'datasetA': '4784374257322819585',\n",
       "  'datasetB': '4785099262507614209',\n",
       "  'EuclideanDistance': 0.5993976331036301},\n",
       " {'datasetA': '4784374257322819585',\n",
       "  'datasetB': '4785099261966548993',\n",
       "  'EuclideanDistance': 0.29044861877240663},\n",
       " {'datasetA': '4784374257322819585',\n",
       "  'datasetB': '4785099261073162241',\n",
       "  'EuclideanDistance': 0.3059533243382302},\n",
       " {'datasetA': '4784374254583939073',\n",
       "  'datasetB': '4785099261115105281',\n",
       "  'EuclideanDistance': 0.7025289322353157},\n",
       " {'datasetA': '4784374253803798529',\n",
       "  'datasetB': '4785099262012686337',\n",
       "  'EuclideanDistance': 0.4736155548878554},\n",
       " {'datasetA': '4784374254109982721',\n",
       "  'datasetB': '4785099262616666113',\n",
       "  'EuclideanDistance': 0.5218618959106159},\n",
       " {'datasetA': '4784374254109982721',\n",
       "  'datasetB': '4785099261727473665',\n",
       "  'EuclideanDistance': 0.4507810674798911},\n",
       " {'datasetA': '4784374253430505473',\n",
       "  'datasetB': '4785099261631004673',\n",
       "  'EuclideanDistance': 0.3564938974263931},\n",
       " {'datasetA': '4784374255414411265',\n",
       "  'datasetB': '4785099261643587585',\n",
       "  'EuclideanDistance': 0.5121953237716897},\n",
       " {'datasetA': '4784374255414411265',\n",
       "  'datasetB': '4785099261727473665',\n",
       "  'EuclideanDistance': 0.5279834794067336},\n",
       " {'datasetA': '4784374257285070849',\n",
       "  'datasetB': '4785099261043802113',\n",
       "  'EuclideanDistance': 0.4289873687046821},\n",
       " {'datasetA': '4784374253224984577',\n",
       "  'datasetB': '4785099262528585729',\n",
       "  'EuclideanDistance': 0.4906365997726927},\n",
       " {'datasetA': '4784374254533607425',\n",
       "  'datasetB': '4785099262520197121',\n",
       "  'EuclideanDistance': 1.1782292799724388},\n",
       " {'datasetA': '4784374257893244929',\n",
       "  'datasetB': '4785099262650220545',\n",
       "  'EuclideanDistance': 0.7458091875177412},\n",
       " {'datasetA': '4784374256848863233',\n",
       "  'datasetB': '4785099262146904065',\n",
       "  'EuclideanDistance': 0.4503850782968198},\n",
       " {'datasetA': '4784374254827208705',\n",
       "  'datasetB': '4785099261907828737',\n",
       "  'EuclideanDistance': 1.9444981747897896},\n",
       " {'datasetA': '4784374254827208705',\n",
       "  'datasetB': '4785099262104961025',\n",
       "  'EuclideanDistance': 1.9243612273689903},\n",
       " {'datasetA': '4784374254625882113',\n",
       "  'datasetB': '4785099262339842049',\n",
       "  'EuclideanDistance': 0.5497933913950062},\n",
       " {'datasetA': '4784374255225667585',\n",
       "  'datasetB': '4785099262113349633',\n",
       "  'EuclideanDistance': 1.9093304949535572},\n",
       " {'datasetA': '4784374255225667585',\n",
       "  'datasetB': '4785099261941383169',\n",
       "  'EuclideanDistance': 0.552828317862403},\n",
       " {'datasetA': '4784374255603154945',\n",
       "  'datasetB': '4785099262193041409',\n",
       "  'EuclideanDistance': 0.5859168954238315},\n",
       " {'datasetA': '4784374255603154945',\n",
       "  'datasetB': '4785099262658609153',\n",
       "  'EuclideanDistance': 0.7133634609604353},\n",
       " {'datasetA': '4784374255603154945',\n",
       "  'datasetB': '4785099262096572417',\n",
       "  'EuclideanDistance': 0.671202143890899},\n",
       " {'datasetA': '4784374256840474625',\n",
       "  'datasetB': '4785099262293704705',\n",
       "  'EuclideanDistance': 1.2411682272008866},\n",
       " {'datasetA': '4784714421802893313',\n",
       "  'datasetB': '4785099262176264193',\n",
       "  'EuclideanDistance': 0.6540569131914372},\n",
       " {'datasetA': '4784374253929627649',\n",
       "  'datasetB': '4785099262520197121',\n",
       "  'EuclideanDistance': 1.2293935312520055},\n",
       " {'datasetA': '4784374253522780161',\n",
       "  'datasetB': '4785099261899440129',\n",
       "  'EuclideanDistance': 0.40654972842190595},\n",
       " {'datasetA': '4784714419433111553',\n",
       "  'datasetB': '4785099261463232513',\n",
       "  'EuclideanDistance': 1.3145542753905843},\n",
       " {'datasetA': '4784714419433111553',\n",
       "  'datasetB': '4785099262658609153',\n",
       "  'EuclideanDistance': 1.0083868767656141},\n",
       " {'datasetA': '4784374253950599169',\n",
       "  'datasetB': '4785099261530341377',\n",
       "  'EuclideanDistance': 0.8921543315731756},\n",
       " {'datasetA': '4784374254957232129',\n",
       "  'datasetB': '4785099261714890753',\n",
       "  'EuclideanDistance': 0.48296002556129114},\n",
       " {'datasetA': '4784374257595449345',\n",
       "  'datasetB': '4785099262327259137',\n",
       "  'EuclideanDistance': 0.4805959697909093},\n",
       " {'datasetA': '4784374251513708545',\n",
       "  'datasetB': '4785099261312237569',\n",
       "  'EuclideanDistance': 1.6123326842233687},\n",
       " {'datasetA': '4784714419852541953',\n",
       "  'datasetB': '4785099262369202177',\n",
       "  'EuclideanDistance': 0.319122677586142},\n",
       " {'datasetA': '4784714422448816129',\n",
       "  'datasetB': '4785099262448893953',\n",
       "  'EuclideanDistance': 0.5410967953246666},\n",
       " {'datasetA': '4784374255762538497',\n",
       "  'datasetB': '4785099262658609153',\n",
       "  'EuclideanDistance': 0.6727128623944604},\n",
       " {'datasetA': '4784374255762538497',\n",
       "  'datasetB': '4785099261899440129',\n",
       "  'EuclideanDistance': 0.49411302162681325},\n",
       " {'datasetA': '4784374253094961153',\n",
       "  'datasetB': '4785099262650220545',\n",
       "  'EuclideanDistance': 0.8616363372886966},\n",
       " {'datasetA': '4784374256534290433',\n",
       "  'datasetB': '4785099262419533825',\n",
       "  'EuclideanDistance': 0.7409671606400089},\n",
       " {'datasetA': '4784714421735784449',\n",
       "  'datasetB': '4785099262608277505',\n",
       "  'EuclideanDistance': 0.5765008241999605},\n",
       " {'datasetA': '4784714419814793217',\n",
       "  'datasetB': '4785099262641831937',\n",
       "  'EuclideanDistance': 0.6605605478178206},\n",
       " {'datasetA': '4784714419705741313',\n",
       "  'datasetB': '4785099262130126849',\n",
       "  'EuclideanDistance': 1.6584657200963837},\n",
       " {'datasetA': '4784714422327181313',\n",
       "  'datasetB': '4785099262369202177',\n",
       "  'EuclideanDistance': 0.35346828454162776},\n",
       " {'datasetA': '4784714421488320513',\n",
       "  'datasetB': '4785099262687969281',\n",
       "  'EuclideanDistance': 0.2860582390523002},\n",
       " {'datasetA': '4784714421937111041',\n",
       "  'datasetB': '4785099261844914177',\n",
       "  'EuclideanDistance': 0.5757469498131724},\n",
       " {'datasetA': '4784374254403584001',\n",
       "  'datasetB': '4785099262193041409',\n",
       "  'EuclideanDistance': 0.6035445848956382},\n",
       " {'datasetA': '4784374257759027201',\n",
       "  'datasetB': '4785099261530341377',\n",
       "  'EuclideanDistance': 0.24131435640104876},\n",
       " {'datasetA': '4784374256010002433',\n",
       "  'datasetB': '4785099262608277505',\n",
       "  'EuclideanDistance': 0.5512904157570598},\n",
       " {'datasetA': '4784374255401828353',\n",
       "  'datasetB': '4785099261584867329',\n",
       "  'EuclideanDistance': 1.2838829613912932},\n",
       " {'datasetA': '4784714421391851521',\n",
       "  'datasetB': '4785099261312237569',\n",
       "  'EuclideanDistance': 0.6253952664895038},\n",
       " {'datasetA': '4784374253963182081',\n",
       "  'datasetB': '4785099261198991361',\n",
       "  'EuclideanDistance': 0.5869417478456487},\n",
       " {'datasetA': '4784714421651898369',\n",
       "  'datasetB': '4785099261198991361',\n",
       "  'EuclideanDistance': 0.5440915916736491},\n",
       " {'datasetA': '4784374257809358849',\n",
       "  'datasetB': '4785099261425483777',\n",
       "  'EuclideanDistance': 0.9360567581670781},\n",
       " {'datasetA': '4784374255460548609',\n",
       "  'datasetB': '4785099261043802113',\n",
       "  'EuclideanDistance': 0.44916727206029644},\n",
       " {'datasetA': '4784374255460548609',\n",
       "  'datasetB': '4785099262104961025',\n",
       "  'EuclideanDistance': 0.37800584133197374},\n",
       " {'datasetA': '4784374250855202817',\n",
       "  'datasetB': '4785099262042046465',\n",
       "  'EuclideanDistance': 0.41032031151795684},\n",
       " {'datasetA': '4784374256077111297',\n",
       "  'datasetB': '4785099262478254081',\n",
       "  'EuclideanDistance': 0.5131181612200975},\n",
       " {'datasetA': '4784374256760782849',\n",
       "  'datasetB': '4785099262658609153',\n",
       "  'EuclideanDistance': 0.6055265653771229},\n",
       " {'datasetA': '4784374255666069505',\n",
       "  'datasetB': '4785099261928800257',\n",
       "  'EuclideanDistance': 1.3556279867126566},\n",
       " {'datasetA': '4784374255666069505',\n",
       "  'datasetB': '4785099261584867329',\n",
       "  'EuclideanDistance': 1.6731869366742385},\n",
       " {'datasetA': '4784714421316354049',\n",
       "  'datasetB': '4785099261605838849',\n",
       "  'EuclideanDistance': 0.5655297483924058},\n",
       " {'datasetA': '4784714418950766593',\n",
       "  'datasetB': '4785099262209818625',\n",
       "  'EuclideanDistance': 1.0003758346378508},\n",
       " {'datasetA': '4784714421727395841',\n",
       "  'datasetB': '4785099261756833793',\n",
       "  'EuclideanDistance': 0.19750110776864296},\n",
       " {'datasetA': '4784714419554746369',\n",
       "  'datasetB': '4785099262021074945',\n",
       "  'EuclideanDistance': 1.2774356146268582},\n",
       " {'datasetA': '4784714419554746369',\n",
       "  'datasetB': '4785099262595694593',\n",
       "  'EuclideanDistance': 0.3805353822862633},\n",
       " {'datasetA': '4784714419751878657',\n",
       "  'datasetB': '4785099262071406593',\n",
       "  'EuclideanDistance': 0.47790501629056104},\n",
       " {'datasetA': '4784714419454083073',\n",
       "  'datasetB': '4785099261899440129',\n",
       "  'EuclideanDistance': 0.3134832651958034},\n",
       " {'datasetA': '4784374256857251841',\n",
       "  'datasetB': '4785099261064773633',\n",
       "  'EuclideanDistance': 0.48281929249383954},\n",
       " {'datasetA': '4784374251484348417',\n",
       "  'datasetB': '4785099262570528769',\n",
       "  'EuclideanDistance': 0.41905252444780294},\n",
       " {'datasetA': '4784374251484348417',\n",
       "  'datasetB': '4785099262247567361',\n",
       "  'EuclideanDistance': 0.4400566473078157},\n",
       " {'datasetA': '4784374256249077761',\n",
       "  'datasetB': '4785099262130126849',\n",
       "  'EuclideanDistance': 0.7730706627633516},\n",
       " {'datasetA': '4784714422230712321',\n",
       "  'datasetB': '4785099262671192065',\n",
       "  'EuclideanDistance': 1.044973710630487},\n",
       " {'datasetA': '4784374256555261953',\n",
       "  'datasetB': '4785099262687969281',\n",
       "  'EuclideanDistance': 0.8245148311995969},\n",
       " {'datasetA': '4784714419684769793',\n",
       "  'datasetB': '4785099262641831937',\n",
       "  'EuclideanDistance': 0.7073470398744092},\n",
       " {'datasetA': '4784714422075523073',\n",
       "  'datasetB': '4785099262209818625',\n",
       "  'EuclideanDistance': 0.5122154786921842},\n",
       " {'datasetA': '4784714422075523073',\n",
       "  'datasetB': '4785099262633443329',\n",
       "  'EuclideanDistance': 0.8675583237056643},\n",
       " {'datasetA': '4784714422075523073',\n",
       "  'datasetB': '4785099261593255937',\n",
       "  'EuclideanDistance': 0.21060657487321935},\n",
       " {'datasetA': '4784374251371102209',\n",
       "  'datasetB': '4785099262042046465',\n",
       "  'EuclideanDistance': 0.4497892240051952},\n",
       " {'datasetA': '4784374252788776961',\n",
       "  'datasetB': '4785099261530341377',\n",
       "  'EuclideanDistance': 0.4607924447652108},\n",
       " {'datasetA': '4784374254189674497',\n",
       "  'datasetB': '4785099261052190721',\n",
       "  'EuclideanDistance': 0.6580331989894402},\n",
       " {'datasetA': '4784714419793821697',\n",
       "  'datasetB': '4785099262121738241',\n",
       "  'EuclideanDistance': 0.5768745146416513},\n",
       " {'datasetA': '4784374256131637249',\n",
       "  'datasetB': '4785099261433872385',\n",
       "  'EuclideanDistance': 1.5303025302867914},\n",
       " {'datasetA': '4784714421689647105',\n",
       "  'datasetB': '4785099261790388225',\n",
       "  'EuclideanDistance': 0.3624555894059897},\n",
       " {'datasetA': '4784374255305359361',\n",
       "  'datasetB': '4785099262595694593',\n",
       "  'EuclideanDistance': 0.29895462840823084},\n",
       " {'datasetA': '4784374255305359361',\n",
       "  'datasetB': '4785099262671192065',\n",
       "  'EuclideanDistance': 0.5954458838797425},\n",
       " {'datasetA': '4784374257117298689',\n",
       "  'datasetB': '4785099262302093313',\n",
       "  'EuclideanDistance': 0.5618337671530235},\n",
       " {'datasetA': '4784374257117298689',\n",
       "  'datasetB': '4785099262570528769',\n",
       "  'EuclideanDistance': 0.8058595535901774},\n",
       " {'datasetA': '4784374253543751681',\n",
       "  'datasetB': '4785099262293704705',\n",
       "  'EuclideanDistance': 1.1737237628287844},\n",
       " {'datasetA': '4784374256723034113',\n",
       "  'datasetB': '4785099262448893953',\n",
       "  'EuclideanDistance': 1.6057149183917327},\n",
       " {'datasetA': '4784374256723034113',\n",
       "  'datasetB': '4785099261354180609',\n",
       "  'EuclideanDistance': 1.543256997854426},\n",
       " {'datasetA': '4784374256723034113',\n",
       "  'datasetB': '4785099262541168641',\n",
       "  'EuclideanDistance': 1.2751313595857838},\n",
       " {'datasetA': '4784374257062772737',\n",
       "  'datasetB': '4785099262616666113',\n",
       "  'EuclideanDistance': 0.3606492383671638},\n",
       " {'datasetA': '4784374253531168769',\n",
       "  'datasetB': '4785099262469865473',\n",
       "  'EuclideanDistance': 0.5224181733605677},\n",
       " {'datasetA': '4784714419114344449',\n",
       "  'datasetB': '4785099262058823681',\n",
       "  'EuclideanDistance': 0.32991536791082343},\n",
       " {'datasetA': '4784714419114344449',\n",
       "  'datasetB': '4785099262155292673',\n",
       "  'EuclideanDistance': 0.4735972239342633},\n",
       " {'datasetA': '4784374254042873857',\n",
       "  'datasetB': '4785099261899440129',\n",
       "  'EuclideanDistance': 0.5855006187188537},\n",
       " {'datasetA': '4784374256165191681',\n",
       "  'datasetB': '4785099261593255937',\n",
       "  'EuclideanDistance': 0.45710819495096616},\n",
       " {'datasetA': '4784374250788093953',\n",
       "  'datasetB': '4785099261299654657',\n",
       "  'EuclideanDistance': 0.8730519992125059},\n",
       " {'datasetA': '4784714419508609025',\n",
       "  'datasetB': '4785099262549557249',\n",
       "  'EuclideanDistance': 0.279185911012107},\n",
       " {'datasetA': '4784714419395362817',\n",
       "  'datasetB': '4785099262193041409',\n",
       "  'EuclideanDistance': 0.5056893795578213},\n",
       " {'datasetA': '4784714419579912193',\n",
       "  'datasetB': '4785099262222401537',\n",
       "  'EuclideanDistance': 0.7910078227912996},\n",
       " {'datasetA': '4784714421182136321',\n",
       "  'datasetB': '4785099261660364801',\n",
       "  'EuclideanDistance': 0.737514614254455},\n",
       " {'datasetA': '4784374251429822465',\n",
       "  'datasetB': '4785099261605838849',\n",
       "  'EuclideanDistance': 0.4873102493696334},\n",
       " {'datasetA': '4784714419269533697',\n",
       "  'datasetB': '4785099262520197121',\n",
       "  'EuclideanDistance': 1.1522115690222592},\n",
       " {'datasetA': '4784374255687041025',\n",
       "  'datasetB': '4785099262570528769',\n",
       "  'EuclideanDistance': 0.8720974333363688},\n",
       " {'datasetA': '4784374256744005633',\n",
       "  'datasetB': '4785099262096572417',\n",
       "  'EuclideanDistance': 0.7774646912934096},\n",
       " {'datasetA': '4784374256744005633',\n",
       "  'datasetB': '4785099262155292673',\n",
       "  'EuclideanDistance': 0.5362145740566507},\n",
       " {'datasetA': '4784374255783510017',\n",
       "  'datasetB': '4785099262650220545',\n",
       "  'EuclideanDistance': 0.49114099307390224},\n",
       " {'datasetA': '4784374255783510017',\n",
       "  'datasetB': '4785099262448893953',\n",
       "  'EuclideanDistance': 0.5560517002070352},\n",
       " {'datasetA': '4784714422222323713',\n",
       "  'datasetB': '4785099261698113537',\n",
       "  'EuclideanDistance': 0.9378457333622312},\n",
       " {'datasetA': '4784714422222323713',\n",
       "  'datasetB': '4785099262725718017',\n",
       "  'EuclideanDistance': 0.4210540107389703},\n",
       " {'datasetA': '4784714422222323713',\n",
       "  'datasetB': '4785099261442260993',\n",
       "  'EuclideanDistance': 0.5716679691407486},\n",
       " {'datasetA': '4784374253178847233',\n",
       "  'datasetB': '4785099261442260993',\n",
       "  'EuclideanDistance': 0.6403564518352997},\n",
       " {'datasetA': '4784374257494786049',\n",
       "  'datasetB': '4785099262461476865',\n",
       "  'EuclideanDistance': 0.5896252007702999},\n",
       " {'datasetA': '4784374256827891713',\n",
       "  'datasetB': '4785099262499225601',\n",
       "  'EuclideanDistance': 0.25759500125236595},\n",
       " {'datasetA': '4784374256827891713',\n",
       "  'datasetB': '4785099261433872385',\n",
       "  'EuclideanDistance': 0.540922162341214},\n",
       " {'datasetA': '4784714421232467969',\n",
       "  'datasetB': '4785099261442260993',\n",
       "  'EuclideanDistance': 0.522884916642432},\n",
       " {'datasetA': '4784374252897828865',\n",
       "  'datasetB': '4785099261341597697',\n",
       "  'EuclideanDistance': 0.6193220999346503},\n",
       " {'datasetA': '4784374252897828865',\n",
       "  'datasetB': '4785099261958160385',\n",
       "  'EuclideanDistance': 0.46199437944783783},\n",
       " {'datasetA': '4784714421366685697',\n",
       "  'datasetB': '4785099261186408449',\n",
       "  'EuclideanDistance': 0.9679151396733009},\n",
       " {'datasetA': '4784374257037606913',\n",
       "  'datasetB': '4785099261312237569',\n",
       "  'EuclideanDistance': 0.87534017475895},\n",
       " {'datasetA': '4784714421354102785',\n",
       "  'datasetB': '4785099262012686337',\n",
       "  'EuclideanDistance': 0.7245565429494601},\n",
       " {'datasetA': '4784714421953888257',\n",
       "  'datasetB': '4785099262167875585',\n",
       "  'EuclideanDistance': 1.7306505304816422},\n",
       " {'datasetA': '4784714421605761025',\n",
       "  'datasetB': '4785099262440505345',\n",
       "  'EuclideanDistance': 1.4736190285123838},\n",
       " {'datasetA': '4784714421832253441',\n",
       "  'datasetB': '4785099262193041409',\n",
       "  'EuclideanDistance': 0.7942712873988476},\n",
       " {'datasetA': '4784714421832253441',\n",
       "  'datasetB': '4785099262021074945',\n",
       "  'EuclideanDistance': 0.9029226746955971},\n",
       " {'datasetA': '4784714421832253441',\n",
       "  'datasetB': '4785099262281121793',\n",
       "  'EuclideanDistance': 0.2651131564897878},\n",
       " {'datasetA': '4784714421870002177',\n",
       "  'datasetB': '4785099262012686337',\n",
       "  'EuclideanDistance': 0.3305512903849751},\n",
       " {'datasetA': '4784714421870002177',\n",
       "  'datasetB': '4785099262461476865',\n",
       "  'EuclideanDistance': 0.636438170450495},\n",
       " {'datasetA': '4784374256773365761',\n",
       "  'datasetB': '4785099261291266049',\n",
       "  'EuclideanDistance': 0.44670470164240705},\n",
       " {'datasetA': '4784374253610860545',\n",
       "  'datasetB': '4785099261085745153',\n",
       "  'EuclideanDistance': 0.7466950065423268},\n",
       " {'datasetA': '4784374253883490305',\n",
       "  'datasetB': '4785099261853302785',\n",
       "  'EuclideanDistance': 0.5686509487808006},\n",
       " {'datasetA': '4784374254781071361',\n",
       "  'datasetB': '4785099261798776833',\n",
       "  'EuclideanDistance': 0.48517766355516545},\n",
       " {'datasetA': '4784714422130049025',\n",
       "  'datasetB': '4785099261878468609',\n",
       "  'EuclideanDistance': 0.8230116473602426},\n",
       " {'datasetA': '4784714421626732545',\n",
       "  'datasetB': '4785099261798776833',\n",
       "  'EuclideanDistance': 0.41385207559581544},\n",
       " {'datasetA': '4784714421626732545',\n",
       "  'datasetB': '4785099262641831937',\n",
       "  'EuclideanDistance': 0.4353953065424594},\n",
       " {'datasetA': '4784714419378585601',\n",
       "  'datasetB': '4785099262050435073',\n",
       "  'EuclideanDistance': 0.3420603209785104},\n",
       " {'datasetA': '4784374256584622081',\n",
       "  'datasetB': '4785099261681336321',\n",
       "  'EuclideanDistance': 0.3622699977134852},\n",
       " {'datasetA': '4784374254814625793',\n",
       "  'datasetB': '4785099262507614209',\n",
       "  'EuclideanDistance': 0.7670891121208572},\n",
       " {'datasetA': '4784374254814625793',\n",
       "  'datasetB': '4785099261425483777',\n",
       "  'EuclideanDistance': 0.9586603714998913},\n",
       " {'datasetA': '4784714419760267265',\n",
       "  'datasetB': '4785099261878468609',\n",
       "  'EuclideanDistance': 1.2097184880179472},\n",
       " {'datasetA': '4784714418963349505',\n",
       "  'datasetB': '4785099261756833793',\n",
       "  'EuclideanDistance': 0.7306623307996963},\n",
       " {'datasetA': '4784374253669580801',\n",
       "  'datasetB': '4785099262318870529',\n",
       "  'EuclideanDistance': 0.7711630039970296},\n",
       " {'datasetA': '4784714421169553409',\n",
       "  'datasetB': '4785099261790388225',\n",
       "  'EuclideanDistance': 0.4528035827984083},\n",
       " {'datasetA': '4784374251081695233',\n",
       "  'datasetB': '4785099262625054721',\n",
       "  'EuclideanDistance': 1.2461154061888093},\n",
       " {'datasetA': '4784714419026264065',\n",
       "  'datasetB': '4785099261672947713',\n",
       "  'EuclideanDistance': 0.3794257687609426},\n",
       " {'datasetA': '4784374251182358529',\n",
       "  'datasetB': '4785099262419533825',\n",
       "  'EuclideanDistance': 1.143806006893683},\n",
       " {'datasetA': '4784374251626954753',\n",
       "  'datasetB': '4785099261781999617',\n",
       "  'EuclideanDistance': 1.558270657221666},\n",
       " {'datasetA': '4784374251626954753',\n",
       "  'datasetB': '4785099261807165441',\n",
       "  'EuclideanDistance': 0.9246223573031773},\n",
       " {'datasetA': '4784374256932749313',\n",
       "  'datasetB': '4785099261157048321',\n",
       "  'EuclideanDistance': 0.6429133052405033},\n",
       " {'datasetA': '4784714421412823041',\n",
       "  'datasetB': '4785099262679580673',\n",
       "  'EuclideanDistance': 0.32178323735680214},\n",
       " {'datasetA': '4784374255158558721',\n",
       "  'datasetB': '4785099261106716673',\n",
       "  'EuclideanDistance': 0.7019610170656867},\n",
       " {'datasetA': '4784714422427844609',\n",
       "  'datasetB': '4785099262469865473',\n",
       "  'EuclideanDistance': 0.45603501564711624},\n",
       " {'datasetA': '4784714422427844609',\n",
       "  'datasetB': '4785099262625054721',\n",
       "  'EuclideanDistance': 1.0528479937356434},\n",
       " {'datasetA': '4784714422427844609',\n",
       "  'datasetB': '4785099261106716673',\n",
       "  'EuclideanDistance': 0.46671588945576375},\n",
       " {'datasetA': '4784374257461231617',\n",
       "  'datasetB': '4785099262595694593',\n",
       "  'EuclideanDistance': 0.42393203952672487},\n",
       " {'datasetA': '4784374257461231617',\n",
       "  'datasetB': '4785099262469865473',\n",
       "  'EuclideanDistance': 0.37109945227923435},\n",
       " {'datasetA': '4784714422142631937',\n",
       "  'datasetB': '4785099262369202177',\n",
       "  'EuclideanDistance': 0.3687461117831303},\n",
       " {'datasetA': '4784714422142631937',\n",
       "  'datasetB': '4785099262641831937',\n",
       "  'EuclideanDistance': 0.43029846613128026},\n",
       " {'datasetA': '4784714418971738113',\n",
       "  'datasetB': '4785099262113349633',\n",
       "  'EuclideanDistance': 1.9059104751957279},\n",
       " {'datasetA': '4784714418971738113',\n",
       "  'datasetB': '4785099261219962881',\n",
       "  'EuclideanDistance': 1.6558222435121024},\n",
       " {'datasetA': '4784374256408461313',\n",
       "  'datasetB': '4785099261228351489',\n",
       "  'EuclideanDistance': 0.5952720302894856},\n",
       " {'datasetA': '4784714419651215361',\n",
       "  'datasetB': '4785099261375152129',\n",
       "  'EuclideanDistance': 0.8200767833015592},\n",
       " {'datasetA': '4784714422012608513',\n",
       "  'datasetB': '4785099261299654657',\n",
       "  'EuclideanDistance': 0.3524556604612654},\n",
       " {'datasetA': '4784714422268461057',\n",
       "  'datasetB': '4785099261404512257',\n",
       "  'EuclideanDistance': 0.4321000640202078},\n",
       " {'datasetA': '4784714422381707265',\n",
       "  'datasetB': '4785099262239178753',\n",
       "  'EuclideanDistance': 0.5892449626125564},\n",
       " {'datasetA': '4784714422381707265',\n",
       "  'datasetB': '4785099261823942657',\n",
       "  'EuclideanDistance': 0.8576113218857759},\n",
       " {'datasetA': '4784374255611543553',\n",
       "  'datasetB': '4785099261576478721',\n",
       "  'EuclideanDistance': 0.8458400319659727},\n",
       " {'datasetA': '4784714422494953473',\n",
       "  'datasetB': '4785099261186408449',\n",
       "  'EuclideanDistance': 0.4066119370875214},\n",
       " {'datasetA': '4784714421119221761',\n",
       "  'datasetB': '4785099261823942657',\n",
       "  'EuclideanDistance': 1.1349120653459028},\n",
       " {'datasetA': '4784374250825842689',\n",
       "  'datasetB': '4785099261299654657',\n",
       "  'EuclideanDistance': 0.392653734113228},\n",
       " {'datasetA': '4784374254495858689',\n",
       "  'datasetB': '4785099261404512257',\n",
       "  'EuclideanDistance': 0.8409842797329653},\n",
       " {'datasetA': '4784374254705573889',\n",
       "  'datasetB': '4785099261341597697',\n",
       "  'EuclideanDistance': 0.36295187173512494},\n",
       " {'datasetA': '4784714422574645249',\n",
       "  'datasetB': '4785099261563895809',\n",
       "  'EuclideanDistance': 0.5853866041488398},\n",
       " {'datasetA': '4784374250892951553',\n",
       "  'datasetB': '4785099261983326209',\n",
       "  'EuclideanDistance': 0.7032511284883802},\n",
       " {'datasetA': '4784374250892951553',\n",
       "  'datasetB': '4785099262419533825',\n",
       "  'EuclideanDistance': 0.5258025629974385},\n",
       " {'datasetA': '4784374257431871489',\n",
       "  'datasetB': '4785099262184652801',\n",
       "  'EuclideanDistance': 0.34722292075453426},\n",
       " {'datasetA': '4784374256324575233',\n",
       "  'datasetB': '4785099261958160385',\n",
       "  'EuclideanDistance': 1.1036790198827608},\n",
       " {'datasetA': '4784714419361808385',\n",
       "  'datasetB': '4785099261823942657',\n",
       "  'EuclideanDistance': 1.2785285147733023},\n",
       " {'datasetA': '4784374258086182913',\n",
       "  'datasetB': '4785099261396123649',\n",
       "  'EuclideanDistance': 1.1623098618478045},\n",
       " {'datasetA': '4784374253245956097',\n",
       "  'datasetB': '4785099261878468609',\n",
       "  'EuclideanDistance': 1.237433760713243},\n",
       " {'datasetA': '4784374253115932673',\n",
       "  'datasetB': '4785099261563895809',\n",
       "  'EuclideanDistance': 0.4098845533353673},\n",
       " {'datasetA': '4784374253971570689',\n",
       "  'datasetB': '4785099261853302785',\n",
       "  'EuclideanDistance': 0.6095910681944643},\n",
       " {'datasetA': '4784374251245273089',\n",
       "  'datasetB': '4785099262184652801',\n",
       "  'EuclideanDistance': 1.3414917115533114},\n",
       " {'datasetA': '4784374252990103553',\n",
       "  'datasetB': '4785099262239178753',\n",
       "  'EuclideanDistance': 0.7076576896266968},\n",
       " {'datasetA': '4784374254244200449',\n",
       "  'datasetB': '4785099262184652801',\n",
       "  'EuclideanDistance': 0.33783135542930093},\n",
       " {'datasetA': '4784374258212012033',\n",
       "  'datasetB': '4785099262499225601',\n",
       "  'EuclideanDistance': 0.7999527137015978},\n",
       " {'datasetA': '4784714422213935105',\n",
       "  'datasetB': '4785099261366763521',\n",
       "  'EuclideanDistance': 0.8456287561017527},\n",
       " {'datasetA': '4784374254432944129',\n",
       "  'datasetB': '4785099261106716673',\n",
       "  'EuclideanDistance': 0.5234606862497239},\n",
       " {'datasetA': '4784374255431188481',\n",
       "  'datasetB': '4785099261228351489',\n",
       "  'EuclideanDistance': 0.6215674022819839},\n",
       " {'datasetA': '4784374253598277633',\n",
       "  'datasetB': '4785099261240934401',\n",
       "  'EuclideanDistance': 1.1735391998348224},\n",
       " {'datasetA': '4784714419588300801',\n",
       "  'datasetB': '4785099261228351489',\n",
       "  'EuclideanDistance': 0.4228086222598962},\n",
       " {'datasetA': '4784374252788776961',\n",
       "  'datasetB': '4785099261333209089',\n",
       "  'EuclideanDistance': 1.195420227350188},\n",
       " {'datasetA': '4784374257117298689',\n",
       "  'datasetB': '4785099261366763521',\n",
       "  'EuclideanDistance': 1.327491973784107},\n",
       " {'datasetA': '4784374251111055361',\n",
       "  'datasetB': '4785099262184652801',\n",
       "  'EuclideanDistance': 0.4274366478222591},\n",
       " {'datasetA': '4784374257360568321',\n",
       "  'datasetB': '4785099261299654657',\n",
       "  'EuclideanDistance': 0.8873038068893148},\n",
       " {'datasetA': '4784714421412823041',\n",
       "  'datasetB': '4785099261404512257',\n",
       "  'EuclideanDistance': 0.3410054947069815},\n",
       " {'datasetA': '4784374255242444801',\n",
       "  'datasetB': '4785099262130126849',\n",
       "  'EuclideanDistance': 0.47808957486376324},\n",
       " {'datasetA': '4784374257050189825',\n",
       "  'datasetB': '4785099261509369857',\n",
       "  'EuclideanDistance': 0.5187640754919682},\n",
       " {'datasetA': '4784374254856568833',\n",
       "  'datasetB': '4785099261299654657',\n",
       "  'EuclideanDistance': 0.35379071188862005},\n",
       " {'datasetA': '4784374254080622593',\n",
       "  'datasetB': '4785099261815554049',\n",
       "  'EuclideanDistance': 0.5992503510948397},\n",
       " {'datasetA': '4784374254999175169',\n",
       "  'datasetB': '4785099261551312897',\n",
       "  'EuclideanDistance': 0.41065405972428437},\n",
       " {'datasetA': '4784714421505097729',\n",
       "  'datasetB': '4785099261320626177',\n",
       "  'EuclideanDistance': 0.6659193041282045},\n",
       " {'datasetA': '4784374255250833409',\n",
       "  'datasetB': '4785099262432116737',\n",
       "  'EuclideanDistance': 0.44589626129665366},\n",
       " {'datasetA': '4784374253317259265',\n",
       "  'datasetB': '4785099262541168641',\n",
       "  'EuclideanDistance': 1.2863230418691007},\n",
       " {'datasetA': '4784374253317259265',\n",
       "  'datasetB': '4785099262469865473',\n",
       "  'EuclideanDistance': 0.7233166669265313},\n",
       " {'datasetA': '4784374255791898625',\n",
       "  'datasetB': '4785099261631004673',\n",
       "  'EuclideanDistance': 0.4185398971483888},\n",
       " {'datasetA': '4784374255234056193',\n",
       "  'datasetB': '4785099261631004673',\n",
       "  'EuclideanDistance': 0.46830679335918357},\n",
       " {'datasetA': '4784374251496931329',\n",
       "  'datasetB': '4785099261509369857',\n",
       "  'EuclideanDistance': 0.9961099658710096},\n",
       " {'datasetA': '4784374257062772737',\n",
       "  'datasetB': '4785099261551312897',\n",
       "  'EuclideanDistance': 0.4083093650966157},\n",
       " {'datasetA': '4784374256920166401',\n",
       "  'datasetB': '4785099261148659713',\n",
       "  'EuclideanDistance': 0.5786286122961846},\n",
       " {'datasetA': '4784374253355008001',\n",
       "  'datasetB': '4785099261157048321',\n",
       "  'EuclideanDistance': 0.690520010896011},\n",
       " {'datasetA': '4784374254445527041',\n",
       "  'datasetB': '4785099262696357889',\n",
       "  'EuclideanDistance': 0.37139120988794594},\n",
       " {'datasetA': '4784374255078866945',\n",
       "  'datasetB': '4785099262587305985',\n",
       "  'EuclideanDistance': 0.3382296584919712},\n",
       " {'datasetA': '4784714419303088129',\n",
       "  'datasetB': '4785099261404512257',\n",
       "  'EuclideanDistance': 0.5472254628704234},\n",
       " {'datasetA': '4784374250708402177',\n",
       "  'datasetB': '4785099261488398337',\n",
       "  'EuclideanDistance': 1.0026677484024473},\n",
       " {'datasetA': '4784374254391001089',\n",
       "  'datasetB': '4785099262385979393',\n",
       "  'EuclideanDistance': 0.8037032141719875},\n",
       " {'datasetA': '4784374254080622593',\n",
       "  'datasetB': '4785099261249323009',\n",
       "  'EuclideanDistance': 0.4882454047287712},\n",
       " {'datasetA': '4784374255250833409',\n",
       "  'datasetB': '4785099262671192065',\n",
       "  'EuclideanDistance': 0.5747697321222404},\n",
       " {'datasetA': '4784714419605078017',\n",
       "  'datasetB': '4785099261249323009',\n",
       "  'EuclideanDistance': 0.3562240429221027},\n",
       " {'datasetA': '4784714421161164801',\n",
       "  'datasetB': '4785099262184652801',\n",
       "  'EuclideanDistance': 0.8272413692565246},\n",
       " {'datasetA': '4784374255519268865',\n",
       "  'datasetB': '4785099261366763521',\n",
       "  'EuclideanDistance': 1.0350646212020935},\n",
       " {'datasetA': '4784374257196990465',\n",
       "  'datasetB': '4785099262578917377',\n",
       "  'EuclideanDistance': 0.8117334473482647},\n",
       " {'datasetA': '4784374257196990465',\n",
       "  'datasetB': '4785099261593255937',\n",
       "  'EuclideanDistance': 0.5256450236897813},\n",
       " {'datasetA': '4784374255045312513',\n",
       "  'datasetB': '4785099262641831937',\n",
       "  'EuclideanDistance': 0.7341467100741146},\n",
       " {'datasetA': '4784374250796482561',\n",
       "  'datasetB': '4785099261270294529',\n",
       "  'EuclideanDistance': 0.4160915181212478},\n",
       " {'datasetA': '4784374257012441089',\n",
       "  'datasetB': '4785099261270294529',\n",
       "  'EuclideanDistance': 1.0236109918733551},\n",
       " {'datasetA': '4784374255171141633',\n",
       "  'datasetB': '4785099261899440129',\n",
       "  'EuclideanDistance': 0.4171909593200107},\n",
       " {'datasetA': '4784374255171141633',\n",
       "  'datasetB': '4785099262302093313',\n",
       "  'EuclideanDistance': 0.6356416246308171},\n",
       " {'datasetA': '4784374255171141633',\n",
       "  'datasetB': '4785099262461476865',\n",
       "  'EuclideanDistance': 0.934356951541504},\n",
       " {'datasetA': '4784374253329842177',\n",
       "  'datasetB': '4785099261115105281',\n",
       "  'EuclideanDistance': 1.494683700883842},\n",
       " {'datasetA': '4784374253329842177',\n",
       "  'datasetB': '4785099261207379969',\n",
       "  'EuclideanDistance': 1.5149430533111587},\n",
       " {'datasetA': '4784374258270732289',\n",
       "  'datasetB': '4785099261270294529',\n",
       "  'EuclideanDistance': 0.7385849142682599},\n",
       " {'datasetA': '4784374253191430145',\n",
       "  'datasetB': '4785099262310481921',\n",
       "  'EuclideanDistance': 0.6213358885239653},\n",
       " {'datasetA': '4784714419416334337',\n",
       "  'datasetB': '4785099261941383169',\n",
       "  'EuclideanDistance': 0.3720336861870097},\n",
       " {'datasetA': '4784714421840642049',\n",
       "  'datasetB': '4785099262255955969',\n",
       "  'EuclideanDistance': 1.1884886474788827},\n",
       " {'datasetA': '4784374253464059905',\n",
       "  'datasetB': '4785099262167875585',\n",
       "  'EuclideanDistance': 1.0593122525909122},\n",
       " {'datasetA': '4784374255653486593',\n",
       "  'datasetB': '4785099261983326209',\n",
       "  'EuclideanDistance': 0.9860087677138448},\n",
       " {'datasetA': '4784374256043556865',\n",
       "  'datasetB': '4785099262255955969',\n",
       "  'EuclideanDistance': 1.2049297242106127},\n",
       " {'datasetA': '4784374255808675841',\n",
       "  'datasetB': '4785099261270294529',\n",
       "  'EuclideanDistance': 1.008313463756182},\n",
       " {'datasetA': '4784374258098765825',\n",
       "  'datasetB': '4785099262633443329',\n",
       "  'EuclideanDistance': 1.2034071636402863},\n",
       " {'datasetA': '4784374258098765825',\n",
       "  'datasetB': '4785099262377590785',\n",
       "  'EuclideanDistance': 0.45256172641811276},\n",
       " {'datasetA': '4784374258098765825',\n",
       "  'datasetB': '4785099262419533825',\n",
       "  'EuclideanDistance': 0.35212060241139437},\n",
       " {'datasetA': '4784374253237567489',\n",
       "  'datasetB': '4785099261433872385',\n",
       "  'EuclideanDistance': 0.6965345698545755},\n",
       " {'datasetA': '4784374253245956097',\n",
       "  'datasetB': '4785099262121738241',\n",
       "  'EuclideanDistance': 1.0378117010127943},\n",
       " {'datasetA': '4784714421521874945',\n",
       "  'datasetB': '4785099262369202177',\n",
       "  'EuclideanDistance': 1.0987931199154903},\n",
       " {'datasetA': '4784374257746444289',\n",
       "  'datasetB': '4785099261966548993',\n",
       "  'EuclideanDistance': 0.6993726224079521},\n",
       " {'datasetA': '4784714419844153345',\n",
       "  'datasetB': '4785099261375152129',\n",
       "  'EuclideanDistance': 0.32608912712802535},\n",
       " {'datasetA': '4784374254290337793',\n",
       "  'datasetB': '4785099262486642689',\n",
       "  'EuclideanDistance': 0.5426980464337927},\n",
       " {'datasetA': '4784714422163603457',\n",
       "  'datasetB': '4785099261073162241',\n",
       "  'EuclideanDistance': 0.7397190602892879},\n",
       " {'datasetA': '4784374257738055681',\n",
       "  'datasetB': '4785099261031219201',\n",
       "  'EuclideanDistance': 0.5672246959465687},\n",
       " {'datasetA': '4784714422184574977',\n",
       "  'datasetB': '4785099261836525569',\n",
       "  'EuclideanDistance': 0.76389963346379},\n",
       " {'datasetA': '4784374253430505473',\n",
       "  'datasetB': '4785099262704746497',\n",
       "  'EuclideanDistance': 0.40639353742803436},\n",
       " {'datasetA': '4784374254202257409',\n",
       "  'datasetB': '4785099261261905921',\n",
       "  'EuclideanDistance': 0.741586743437903},\n",
       " {'datasetA': '4784374258212012033',\n",
       "  'datasetB': '4785099261941383169',\n",
       "  'EuclideanDistance': 0.5832378593608215},\n",
       " {'datasetA': '4784374253942210561',\n",
       "  'datasetB': '4785099262704746497',\n",
       "  'EuclideanDistance': 0.7437200839030904},\n",
       " {'datasetA': '4784714422213935105',\n",
       "  'datasetB': '4785099262176264193',\n",
       "  'EuclideanDistance': 1.1976534938687402},\n",
       " {'datasetA': '4784374256840474625',\n",
       "  'datasetB': '4785099262255955969',\n",
       "  'EuclideanDistance': 1.5070067816120816},\n",
       " {'datasetA': '4784374253044629505',\n",
       "  'datasetB': '4785099262239178753',\n",
       "  'EuclideanDistance': 1.1249754547964679},\n",
       " {'datasetA': '4784374257595449345',\n",
       "  'datasetB': '4785099261375152129',\n",
       "  'EuclideanDistance': 0.6522508642846377},\n",
       " {'datasetA': '4784714421735784449',\n",
       "  'datasetB': '4785099262255955969',\n",
       "  'EuclideanDistance': 0.9292989977937375},\n",
       " {'datasetA': '4784714421735784449',\n",
       "  'datasetB': '4785099262704746497',\n",
       "  'EuclideanDistance': 0.5573867652694148},\n",
       " {'datasetA': '4784374257222156289',\n",
       "  'datasetB': '4785099261094133761',\n",
       "  'EuclideanDistance': 0.9099175494889743},\n",
       " {'datasetA': '4784374257222156289',\n",
       "  'datasetB': '4785099261714890753',\n",
       "  'EuclideanDistance': 0.8229464851074407},\n",
       " {'datasetA': '4784374257222156289',\n",
       "  'datasetB': '4785099261354180609',\n",
       "  'EuclideanDistance': 0.582384240311262},\n",
       " {'datasetA': '4784374256676896769',\n",
       "  'datasetB': '4785099261291266049',\n",
       "  'EuclideanDistance': 0.5816984474308361},\n",
       " {'datasetA': '4784374254592327681',\n",
       "  'datasetB': '4785099262310481921',\n",
       "  'EuclideanDistance': 0.6490224627238467},\n",
       " {'datasetA': '4784374255263416321',\n",
       "  'datasetB': '4785099261538729985',\n",
       "  'EuclideanDistance': 0.884895066896794},\n",
       " {'datasetA': '4784714422364930049',\n",
       "  'datasetB': '4785099261605838849',\n",
       "  'EuclideanDistance': 0.7321545038855607},\n",
       " {'datasetA': '4784714422364930049',\n",
       "  'datasetB': '4785099262176264193',\n",
       "  'EuclideanDistance': 0.9108697960058842},\n",
       " {'datasetA': '4784374253858324481',\n",
       "  'datasetB': '4785099262520197121',\n",
       "  'EuclideanDistance': 1.3244862482272706},\n",
       " {'datasetA': '4784374251454988289',\n",
       "  'datasetB': '4785099262184652801',\n",
       "  'EuclideanDistance': 1.0589324521064811},\n",
       " {'datasetA': '4784374258031656961',\n",
       "  'datasetB': '4785099261375152129',\n",
       "  'EuclideanDistance': 1.0360167281213633},\n",
       " {'datasetA': '4784714422230712321',\n",
       "  'datasetB': '4785099262255955969',\n",
       "  'EuclideanDistance': 1.4970474522248747},\n",
       " {'datasetA': '4784714422553673729',\n",
       "  'datasetB': '4785099262704746497',\n",
       "  'EuclideanDistance': 1.1864926548455867},\n",
       " {'datasetA': '4784714419084984321',\n",
       "  'datasetB': '4785099261563895809',\n",
       "  'EuclideanDistance': 0.7906202029952336},\n",
       " {'datasetA': '4784714419403751425',\n",
       "  'datasetB': '4785099262155292673',\n",
       "  'EuclideanDistance': 0.5295222110949813},\n",
       " {'datasetA': '4784374251048140801',\n",
       "  'datasetB': '4785099261983326209',\n",
       "  'EuclideanDistance': 1.540377431796423},\n",
       " {'datasetA': '4784714419001098241',\n",
       "  'datasetB': '4785099261354180609',\n",
       "  'EuclideanDistance': 0.5135264259661412},\n",
       " {'datasetA': '4784374250993614849',\n",
       "  'datasetB': '4785099261798776833',\n",
       "  'EuclideanDistance': 0.48427229635376645},\n",
       " {'datasetA': '4784714421870002177',\n",
       "  'datasetB': '4785099261366763521',\n",
       "  'EuclideanDistance': 0.9255562593831684},\n",
       " {'datasetA': '4784714422012608513',\n",
       "  'datasetB': '4785099262578917377',\n",
       "  'EuclideanDistance': 0.5944839554988609},\n",
       " {'datasetA': '4784374254411972609',\n",
       "  'datasetB': '4785099261949771777',\n",
       "  'EuclideanDistance': 0.5331084458393537},\n",
       " {'datasetA': '4784374253740883969',\n",
       "  'datasetB': '4785099262193041409',\n",
       "  'EuclideanDistance': 1.0203250001663136},\n",
       " {'datasetA': '4784714419777044481',\n",
       "  'datasetB': '4785099262193041409',\n",
       "  'EuclideanDistance': 1.1503376740007383},\n",
       " {'datasetA': '4784374255640903681',\n",
       "  'datasetB': '4785099262239178753',\n",
       "  'EuclideanDistance': 0.8196790484215847},\n",
       " {'datasetA': '4784714419416334337',\n",
       "  'datasetB': '4785099261995909121',\n",
       "  'EuclideanDistance': 0.6335044038045381},\n",
       " {'datasetA': '4784374255741566977',\n",
       "  'datasetB': '4785099262012686337',\n",
       "  'EuclideanDistance': 0.6877211273633111},\n",
       " {'datasetA': '4784374255741566977',\n",
       "  'datasetB': '4785099261538729985',\n",
       "  'EuclideanDistance': 0.5710986568879814},\n",
       " {'datasetA': '4784714421962276865',\n",
       "  'datasetB': '4785099261949771777',\n",
       "  'EuclideanDistance': 0.4559103932143926},\n",
       " {'datasetA': '4784374250892951553',\n",
       "  'datasetB': '4785099261974937601',\n",
       "  'EuclideanDistance': 0.7144781233121017},\n",
       " {'datasetA': '4784374253170458625',\n",
       "  'datasetB': '4785099262193041409',\n",
       "  'EuclideanDistance': 1.1220356198566173},\n",
       " {'datasetA': '4784374256907583489',\n",
       "  'datasetB': '4785099262239178753',\n",
       "  'EuclideanDistance': 0.739018490420721},\n",
       " {'datasetA': '4784374258098765825',\n",
       "  'datasetB': '4785099261949771777',\n",
       "  'EuclideanDistance': 0.3639905257846804},\n",
       " {'datasetA': '4784374253237567489',\n",
       "  'datasetB': '4785099262616666113',\n",
       "  'EuclideanDistance': 0.4572150543829956},\n",
       " {'datasetA': '4784374255112421377',\n",
       "  'datasetB': '4785099262255955969',\n",
       "  'EuclideanDistance': 0.907539517604816},\n",
       " {'datasetA': '4784714422184574977',\n",
       "  'datasetB': '4785099261157048321',\n",
       "  'EuclideanDistance': 0.7104484025587705},\n",
       " {'datasetA': '4784374251664703489',\n",
       "  'datasetB': '4785099262310481921',\n",
       "  'EuclideanDistance': 1.601212311675961},\n",
       " {'datasetA': '4784374256735617025',\n",
       "  'datasetB': '4785099261404512257',\n",
       "  'EuclideanDistance': 0.8043733843181861},\n",
       " {'datasetA': '4784714419160481793',\n",
       "  'datasetB': '4785099261442260993',\n",
       "  'EuclideanDistance': 0.7582875934988793},\n",
       " {'datasetA': '4784374256270049281',\n",
       "  'datasetB': '4785099261312237569',\n",
       "  'EuclideanDistance': 0.6890629621951884},\n",
       " {'datasetA': '4784374256270049281',\n",
       "  'datasetB': '4785099262440505345',\n",
       "  'EuclideanDistance': 1.5541785957887557},\n",
       " {'datasetA': '4784374254122565633',\n",
       "  'datasetB': '4785099262104961025',\n",
       "  'EuclideanDistance': 0.21626234803997071},\n",
       " {'datasetA': '4784374256920166401',\n",
       "  'datasetB': '4785099262138515457',\n",
       "  'EuclideanDistance': 0.5006904150452526},\n",
       " {'datasetA': '4784374256920166401',\n",
       "  'datasetB': '4785099261983326209',\n",
       "  'EuclideanDistance': 0.7599822878128392},\n",
       " {'datasetA': '4784714422151020545',\n",
       "  'datasetB': '4785099262193041409',\n",
       "  'EuclideanDistance': 0.8944328557769262},\n",
       " {'datasetA': '4784714421530263553',\n",
       "  'datasetB': '4785099262704746497',\n",
       "  'EuclideanDistance': 0.8528851298043022},\n",
       " {'datasetA': '4784374256299409409',\n",
       "  'datasetB': '4785099261157048321',\n",
       "  'EuclideanDistance': 0.5065565881688705},\n",
       " {'datasetA': '4784374253782827009',\n",
       "  'datasetB': '4785099261454843905',\n",
       "  'EuclideanDistance': 0.5985085331856946},\n",
       " {'datasetA': '4784374253464059905',\n",
       "  'datasetB': '4785099262432116737',\n",
       "  'EuclideanDistance': 0.7206511794193685},\n",
       " {'datasetA': '4784374251329159169',\n",
       "  'datasetB': '4785099262587305985',\n",
       "  'EuclideanDistance': 0.4582169480157397},\n",
       " {'datasetA': '4784374254504247297',\n",
       "  'datasetB': '4785099261509369857',\n",
       "  'EuclideanDistance': 0.569697426223592},\n",
       " {'datasetA': '4784374255955476481',\n",
       "  'datasetB': '4785099261291266049',\n",
       "  'EuclideanDistance': 0.9619699494916956},\n",
       " {'datasetA': '4784714421148581889',\n",
       "  'datasetB': '4785099261631004673',\n",
       "  'EuclideanDistance': 0.3266915254146945},\n",
       " {'datasetA': '4784374255452160001',\n",
       "  'datasetB': '4785099261148659713',\n",
       "  'EuclideanDistance': 0.8047410891981289},\n",
       " {'datasetA': '4784374255452160001',\n",
       "  'datasetB': '4785099262021074945',\n",
       "  'EuclideanDistance': 1.1320030494717177},\n",
       " {'datasetA': '4784374253212401665',\n",
       "  'datasetB': '4785099261916217345',\n",
       "  'EuclideanDistance': 0.47682422676562525},\n",
       " {'datasetA': '4784374253212401665',\n",
       "  'datasetB': '4785099261891051521',\n",
       "  'EuclideanDistance': 1.140625609093491},\n",
       " {'datasetA': '4784374254877540353',\n",
       "  'datasetB': '4785099261551312897',\n",
       "  'EuclideanDistance': 0.6589340574732009},\n",
       " {'datasetA': '4784374257297653761',\n",
       "  'datasetB': '4785099261689724929',\n",
       "  'EuclideanDistance': 0.7157614323792779},\n",
       " {'datasetA': '4784374254562967553',\n",
       "  'datasetB': '4785099261689724929',\n",
       "  'EuclideanDistance': 0.7036405030567668},\n",
       " {'datasetA': '4784374257503174657',\n",
       "  'datasetB': '4785099262209818625',\n",
       "  'EuclideanDistance': 0.48988040730662386},\n",
       " {'datasetA': '4784714421861613569',\n",
       "  'datasetB': '4785099261320626177',\n",
       "  'EuclideanDistance': 1.0529831472581566},\n",
       " {'datasetA': '4784374254932066305',\n",
       "  'datasetB': '4785099262432116737',\n",
       "  'EuclideanDistance': 0.8738558949319458},\n",
       " {'datasetA': '4784374257222156289',\n",
       "  'datasetB': '4785099261488398337',\n",
       "  'EuclideanDistance': 1.0354005250307798},\n",
       " {'datasetA': '4784714421643509761',\n",
       "  'datasetB': '4785099261521952769',\n",
       "  'EuclideanDistance': 0.6651931215925145},\n",
       " {'datasetA': '4784374253531168769',\n",
       "  'datasetB': '4785099261454843905',\n",
       "  'EuclideanDistance': 0.6866650365762825},\n",
       " {'datasetA': '4784374253761855489',\n",
       "  'datasetB': '4785099261064773633',\n",
       "  'EuclideanDistance': 1.0874192091691022},\n",
       " {'datasetA': '4784374255687041025',\n",
       "  'datasetB': '4785099261551312897',\n",
       "  'EuclideanDistance': 0.9421178754886164},\n",
       " {'datasetA': '4784714421832253441',\n",
       "  'datasetB': '4785099261521952769',\n",
       "  'EuclideanDistance': 0.38725090563839104},\n",
       " {'datasetA': '4784714419026264065',\n",
       "  'datasetB': '4785099262587305985',\n",
       "  'EuclideanDistance': 0.7810301497958667},\n",
       " {'datasetA': '4784714422524313601',\n",
       "  'datasetB': '4785099262033657857',\n",
       "  'EuclideanDistance': 0.4585566383081614},\n",
       " {'datasetA': '4784374254651047937',\n",
       "  'datasetB': '4785099261488398337',\n",
       "  'EuclideanDistance': 0.9215926670710508},\n",
       " {'datasetA': '4784374255355691009',\n",
       "  'datasetB': '4785099261605838849',\n",
       "  'EuclideanDistance': 0.7806962876123771},\n",
       " {'datasetA': '4784374252839108609',\n",
       "  'datasetB': '4785099262138515457',\n",
       "  'EuclideanDistance': 0.5063599919821652},\n",
       " {'datasetA': '4784374252839108609',\n",
       "  'datasetB': '4785099261186408449',\n",
       "  'EuclideanDistance': 0.27052898635598693},\n",
       " {'datasetA': '4784714421161164801',\n",
       "  'datasetB': '4785099262549557249',\n",
       "  'EuclideanDistance': 0.6330757876884398},\n",
       " {'datasetA': '4784374257335402497',\n",
       "  'datasetB': '4785099261530341377',\n",
       "  'EuclideanDistance': 0.3285407672996997},\n",
       " {'datasetA': '4784374257087938561',\n",
       "  'datasetB': '4785099261941383169',\n",
       "  'EuclideanDistance': 0.8714071280356022},\n",
       " {'datasetA': '4784714421203107841',\n",
       "  'datasetB': '4785099262562140161',\n",
       "  'EuclideanDistance': 0.6530525014759871},\n",
       " {'datasetA': '4784374255800287233',\n",
       "  'datasetB': '4785099262318870529',\n",
       "  'EuclideanDistance': 0.7416800382969835},\n",
       " {'datasetA': '4784714421790310401',\n",
       "  'datasetB': '4785099262096572417',\n",
       "  'EuclideanDistance': 0.8229956395715832},\n",
       " {'datasetA': '4784374258258149377',\n",
       "  'datasetB': '4785099262507614209',\n",
       "  'EuclideanDistance': 0.46537392934693955},\n",
       " {'datasetA': '4784374258258149377',\n",
       "  'datasetB': '4785099261031219201',\n",
       "  'EuclideanDistance': 0.4124583259517117},\n",
       " {'datasetA': '4784374258258149377',\n",
       "  'datasetB': '4785099262440505345',\n",
       "  'EuclideanDistance': 1.375569912334547},\n",
       " {'datasetA': '4784374255724789761',\n",
       "  'datasetB': '4785099262448893953',\n",
       "  'EuclideanDistance': 0.31121455634844986},\n",
       " {'datasetA': '4784374255724789761',\n",
       "  'datasetB': '4785099262222401537',\n",
       "  'EuclideanDistance': 0.6345582303106024},\n",
       " {'datasetA': '4784374255724789761',\n",
       "  'datasetB': '4785099262633443329',\n",
       "  'EuclideanDistance': 0.704400843110712},\n",
       " {'datasetA': '4784374250817454081',\n",
       "  'datasetB': '4785099262104961025',\n",
       "  'EuclideanDistance': 0.31156131239850476},\n",
       " {'datasetA': '4784374254453915649',\n",
       "  'datasetB': '4785099261907828737',\n",
       "  'EuclideanDistance': 0.390506914647205},\n",
       " {'datasetA': '4784714421945499649',\n",
       "  'datasetB': '4785099261660364801',\n",
       "  'EuclideanDistance': 1.141133688065826},\n",
       " {'datasetA': '4784374258178457601',\n",
       "  'datasetB': '4785099262578917377',\n",
       "  'EuclideanDistance': 0.5066309684699215},\n",
       " {'datasetA': '4784374258178457601',\n",
       "  'datasetB': '4785099262448893953',\n",
       "  'EuclideanDistance': 0.5877151761455818},\n",
       " {'datasetA': '4784374258178457601',\n",
       "  'datasetB': '4785099262608277505',\n",
       "  'EuclideanDistance': 1.1102784839984787},\n",
       " {'datasetA': '4784374254793654273',\n",
       "  'datasetB': '4785099262725718017',\n",
       "  'EuclideanDistance': 0.6074640261010419},\n",
       " {'datasetA': '4784374254793654273',\n",
       "  'datasetB': '4785099261433872385',\n",
       "  'EuclideanDistance': 0.7721149474377843},\n",
       " {'datasetA': '4784374258157486081',\n",
       "  'datasetB': '4785099262222401537',\n",
       "  'EuclideanDistance': 0.42332638324748456},\n",
       " {'datasetA': '4784714422171992065',\n",
       "  'datasetB': '4785099261031219201',\n",
       "  'EuclideanDistance': 0.6582971917626694},\n",
       " {'datasetA': '4784714419651215361',\n",
       "  'datasetB': '4785099261341597697',\n",
       "  'EuclideanDistance': 0.7999496209365078},\n",
       " {'datasetA': '4784714419286310913',\n",
       "  'datasetB': '4785099261341597697',\n",
       "  'EuclideanDistance': 0.48362719409938854},\n",
       " {'datasetA': '4784374254273560577',\n",
       "  'datasetB': '4785099262419533825',\n",
       "  'EuclideanDistance': 0.6134802796857936},\n",
       " {'datasetA': '4784374254273560577',\n",
       "  'datasetB': '4785099261530341377',\n",
       "  'EuclideanDistance': 0.2674762448220549},\n",
       " {'datasetA': '4784374256974692353',\n",
       "  'datasetB': '4785099262281121793',\n",
       "  'EuclideanDistance': 0.3815865011644229},\n",
       " {'datasetA': '4784374256974692353',\n",
       "  'datasetB': '4785099261622616065',\n",
       "  'EuclideanDistance': 0.5085267777056618},\n",
       " {'datasetA': '4784374256706256897',\n",
       "  'datasetB': '4785099261660364801',\n",
       "  'EuclideanDistance': 0.7858028792914755},\n",
       " {'datasetA': '4784374256706256897',\n",
       "  'datasetB': '4785099261073162241',\n",
       "  'EuclideanDistance': 0.6147355172154684},\n",
       " {'datasetA': '4784714422020997121',\n",
       "  'datasetB': '4785099261605838849',\n",
       "  'EuclideanDistance': 0.6855943099004068},\n",
       " {'datasetA': '4784714422020997121',\n",
       "  'datasetB': '4785099261500981249',\n",
       "  'EuclideanDistance': 0.5664221040638374},\n",
       " {'datasetA': '4784374255909339137',\n",
       "  'datasetB': '4785099261249323009',\n",
       "  'EuclideanDistance': 0.3620697449241253},\n",
       " {'datasetA': '4784714422469787649',\n",
       "  'datasetB': '4785099261836525569',\n",
       "  'EuclideanDistance': 0.8333071705726699},\n",
       " {'datasetA': '4784374254034485249',\n",
       "  'datasetB': '4785099262696357889',\n",
       "  'EuclideanDistance': 1.4038498245183555},\n",
       " {'datasetA': '4784374256819503105',\n",
       "  'datasetB': '4785099261672947713',\n",
       "  'EuclideanDistance': 0.646092736224506},\n",
       " {'datasetA': '4784374250758733825',\n",
       "  'datasetB': '4785099262687969281',\n",
       "  'EuclideanDistance': 0.2957128772933246},\n",
       " {'datasetA': '4784374251400462337',\n",
       "  'datasetB': '4785099262570528769',\n",
       "  'EuclideanDistance': 0.4837310055217701},\n",
       " {'datasetA': '4784374250934894593',\n",
       "  'datasetB': '4785099261198991361',\n",
       "  'EuclideanDistance': 0.7178137779689316},\n",
       " {'datasetA': '4784374250934894593',\n",
       "  'datasetB': '4785099261538729985',\n",
       "  'EuclideanDistance': 0.2574835954844222},\n",
       " {'datasetA': '4784374255204696065',\n",
       "  'datasetB': '4785099261073162241',\n",
       "  'EuclideanDistance': 0.4788008912470464},\n",
       " {'datasetA': '4784714421752561665',\n",
       "  'datasetB': '4785099261412900865',\n",
       "  'EuclideanDistance': 1.6887763682587051},\n",
       " {'datasetA': '4784714421765144577',\n",
       "  'datasetB': '4785099261844914177',\n",
       "  'EuclideanDistance': 0.395454067364019},\n",
       " {'datasetA': '4784714419835764737',\n",
       "  'datasetB': '4785099261249323009',\n",
       "  'EuclideanDistance': 0.2876395970048258},\n",
       " {'datasetA': '4784714419835764737',\n",
       "  'datasetB': '4785099261844914177',\n",
       "  'EuclideanDistance': 0.4271464091208323},\n",
       " {'datasetA': '4784714419835764737',\n",
       "  'datasetB': '4785099261672947713',\n",
       "  'EuclideanDistance': 0.5567393242769119},\n",
       " {'datasetA': '4784714419835764737',\n",
       "  'datasetB': '4785099262104961025',\n",
       "  'EuclideanDistance': 0.29144312293115854},\n",
       " {'datasetA': '4784714421421211649',\n",
       "  'datasetB': '4785099261354180609',\n",
       "  'EuclideanDistance': 0.46737927407119034},\n",
       " {'datasetA': '4784714421421211649',\n",
       "  'datasetB': '4785099261706502145',\n",
       "  'EuclideanDistance': 0.6976930504675808},\n",
       " {'datasetA': '4784374253443088385',\n",
       "  'datasetB': '4785099261043802113',\n",
       "  'EuclideanDistance': 0.5607514501446396},\n",
       " {'datasetA': '4784374251475959809',\n",
       "  'datasetB': '4785099262088183809',\n",
       "  'EuclideanDistance': 0.5127939412967043},\n",
       " {'datasetA': '4784714422381707265',\n",
       "  'datasetB': '4785099262302093313',\n",
       "  'EuclideanDistance': 0.72561086814154},\n",
       " {'datasetA': '4784374257683529729',\n",
       "  'datasetB': '4785099262272733185',\n",
       "  'EuclideanDistance': 0.8574641512467573},\n",
       " {'datasetA': '4784374258111348737',\n",
       "  'datasetB': '4785099261744250881',\n",
       "  'EuclideanDistance': 0.2627589137669543},\n",
       " {'datasetA': '4784714419546357761',\n",
       "  'datasetB': '4785099261312237569',\n",
       "  'EuclideanDistance': 0.8390643395007034},\n",
       " {'datasetA': '4784374250796482561',\n",
       "  'datasetB': '4785099261219962881',\n",
       "  'EuclideanDistance': 1.8741002227060242},\n",
       " {'datasetA': '4784374257842913281',\n",
       "  'datasetB': '4785099261480009729',\n",
       "  'EuclideanDistance': 1.0535193678502834},\n",
       " {'datasetA': '4784374257842913281',\n",
       "  'datasetB': '4785099261584867329',\n",
       "  'EuclideanDistance': 0.5463128642234275},\n",
       " {'datasetA': '4784374256416849921',\n",
       "  'datasetB': '4785099261106716673',\n",
       "  'EuclideanDistance': 0.4782022276507499},\n",
       " {'datasetA': '4784714419470860289',\n",
       "  'datasetB': '4785099261148659713',\n",
       "  'EuclideanDistance': 0.7498711551087044},\n",
       " {'datasetA': '4784714419470860289',\n",
       "  'datasetB': '4785099262549557249',\n",
       "  'EuclideanDistance': 0.5324951329725846},\n",
       " {'datasetA': '4784374250679042049',\n",
       "  'datasetB': '4785099261781999617',\n",
       "  'EuclideanDistance': 0.44716871023253907},\n",
       " {'datasetA': '4784714422046162945',\n",
       "  'datasetB': '4785099262209818625',\n",
       "  'EuclideanDistance': 0.4667737491139823},\n",
       " {'datasetA': '4784374250964254721',\n",
       "  'datasetB': '4785099261899440129',\n",
       "  'EuclideanDistance': 0.3165951037693452},\n",
       " {'datasetA': '4784374253128515585',\n",
       "  'datasetB': '4785099261500981249',\n",
       "  'EuclideanDistance': 0.7702060177246565},\n",
       " {'datasetA': '4784374257977131009',\n",
       "  'datasetB': '4785099262193041409',\n",
       "  'EuclideanDistance': 0.495846453348063},\n",
       " {'datasetA': '4784374253405339649',\n",
       "  'datasetB': '4785099261354180609',\n",
       "  'EuclideanDistance': 0.5450817116853461},\n",
       " {'datasetA': '4784374253405339649',\n",
       "  'datasetB': '4785099262369202177',\n",
       "  'EuclideanDistance': 0.5831855995865232},\n",
       " {'datasetA': '4784714421819670529',\n",
       "  'datasetB': '4785099262222401537',\n",
       "  'EuclideanDistance': 0.47785237369049804},\n",
       " {'datasetA': '4784714421819670529',\n",
       "  'datasetB': '4785099261249323009',\n",
       "  'EuclideanDistance': 0.3264496772692851},\n",
       " {'datasetA': '4784374257863884801',\n",
       "  'datasetB': '4785099261593255937',\n",
       "  'EuclideanDistance': 0.19182311006330038},\n",
       " {'datasetA': '4784374254898511873',\n",
       "  'datasetB': '4785099261706502145',\n",
       "  'EuclideanDistance': 0.6588970482973275},\n",
       " {'datasetA': '4784374254898511873',\n",
       "  'datasetB': '4785099261756833793',\n",
       "  'EuclideanDistance': 0.672912101597078},\n",
       " {'datasetA': '4784374257515757569',\n",
       "  'datasetB': '4785099262658609153',\n",
       "  'EuclideanDistance': 0.7932641486026878},\n",
       " {'datasetA': '4784374254965620737',\n",
       "  'datasetB': '4785099261714890753',\n",
       "  'EuclideanDistance': 0.6545084697833802},\n",
       " {'datasetA': '4784714419525386241',\n",
       "  'datasetB': '4785099262088183809',\n",
       "  'EuclideanDistance': 0.5317028517920181},\n",
       " {'datasetA': '4784374255640903681',\n",
       "  'datasetB': '4785099261261905921',\n",
       "  'EuclideanDistance': 0.4315088928725247},\n",
       " {'datasetA': '4784374254684602369',\n",
       "  'datasetB': '4785099261836525569',\n",
       "  'EuclideanDistance': 0.41489785701506493},\n",
       " {'datasetA': '4784714421681258497',\n",
       "  'datasetB': '4785099262096572417',\n",
       "  'EuclideanDistance': 0.7047607498004506},\n",
       " {'datasetA': '4784374254281949185',\n",
       "  'datasetB': '4785099262264344577',\n",
       "  'EuclideanDistance': 0.42304164862359855},\n",
       " {'datasetA': '4784714422239100929',\n",
       "  'datasetB': '4785099261500981249',\n",
       "  'EuclideanDistance': 0.34344104257401636},\n",
       " {'datasetA': '4784714422239100929',\n",
       "  'datasetB': '4785099261106716673',\n",
       "  'EuclideanDistance': 0.4017105194544553},\n",
       " {'datasetA': '4784374254550384641',\n",
       "  'datasetB': '4785099261974937601',\n",
       "  'EuclideanDistance': 0.4944395499705523},\n",
       " {'datasetA': '4784374254550384641',\n",
       "  'datasetB': '4785099262377590785',\n",
       "  'EuclideanDistance': 0.23367456299059158},\n",
       " {'datasetA': '4784374254550384641',\n",
       "  'datasetB': '4785099262130126849',\n",
       "  'EuclideanDistance': 0.6622322289823517},\n",
       " {'datasetA': '4784374253845741569',\n",
       "  'datasetB': '4785099262570528769',\n",
       "  'EuclideanDistance': 0.5504078472336968},\n",
       " {'datasetA': '4784374253845741569',\n",
       "  'datasetB': '4785099262696357889',\n",
       "  'EuclideanDistance': 0.38678104626142684},\n",
       " {'datasetA': '4784374255099838465',\n",
       "  'datasetB': '4785099261094133761',\n",
       "  'EuclideanDistance': 0.6286287461657187},\n",
       " {'datasetA': '4784714421983248385',\n",
       "  'datasetB': '4785099261878468609',\n",
       "  'EuclideanDistance': 1.3055699838337351},\n",
       " {'datasetA': '4784714421983248385',\n",
       "  'datasetB': '4785099261442260993',\n",
       "  'EuclideanDistance': 0.49959599775240177},\n",
       " {'datasetA': '4784374256668508161',\n",
       "  'datasetB': '4785099261249323009',\n",
       "  'EuclideanDistance': 0.3265568266011567},\n",
       " {'datasetA': '4784714419768655873',\n",
       "  'datasetB': '4785099262440505345',\n",
       "  'EuclideanDistance': 1.1039656175766652},\n",
       " {'datasetA': '4784714419667992577',\n",
       "  'datasetB': '4785099262167875585',\n",
       "  'EuclideanDistance': 0.4982090779619993},\n",
       " {'datasetA': '4784714419667992577',\n",
       "  'datasetB': '4785099261219962881',\n",
       "  'EuclideanDistance': 1.4551426832993504},\n",
       " {'datasetA': '4784714419667992577',\n",
       "  'datasetB': '4785099262104961025',\n",
       "  'EuclideanDistance': 0.5950055908827739},\n",
       " {'datasetA': '4784714419533774849',\n",
       "  'datasetB': '4785099262209818625',\n",
       "  'EuclideanDistance': 0.7484433628694466},\n",
       " {'datasetA': '4784374256152608769',\n",
       "  'datasetB': '4785099262104961025',\n",
       "  'EuclideanDistance': 0.789337315884865},\n",
       " {'datasetA': '4784374256379101185',\n",
       "  'datasetB': '4785099262419533825',\n",
       "  'EuclideanDistance': 0.8753382507721171},\n",
       " {'datasetA': '4784714421840642049',\n",
       "  'datasetB': '4785099262478254081',\n",
       "  'EuclideanDistance': 0.706208668998942},\n",
       " {'datasetA': '4784374257050189825',\n",
       "  'datasetB': '4785099262486642689',\n",
       "  'EuclideanDistance': 0.5448432869085986},\n",
       " {'datasetA': '4784714421962276865',\n",
       "  'datasetB': '4785099261463232513',\n",
       "  'EuclideanDistance': 0.9891318389545021},\n",
       " {'datasetA': '4784714421962276865',\n",
       "  'datasetB': '4785099261354180609',\n",
       "  'EuclideanDistance': 0.19787141327180294},\n",
       " {'datasetA': '4784374254357446657',\n",
       "  'datasetB': '4785099262633443329',\n",
       "  'EuclideanDistance': 0.9343983928235476},\n",
       " {'datasetA': '4784714422574645249',\n",
       "  'datasetB': '4785099261282877441',\n",
       "  'EuclideanDistance': 0.5854982264032728},\n",
       " {'datasetA': '4784714419235979265',\n",
       "  'datasetB': '4785099261853302785',\n",
       "  'EuclideanDistance': 0.3507030114539806},\n",
       " {'datasetA': '4784714421211496449',\n",
       "  'datasetB': '4785099262608277505',\n",
       "  'EuclideanDistance': 0.771203262578764},\n",
       " {'datasetA': '4784374257608032257',\n",
       "  'datasetB': '4785099261928800257',\n",
       "  'EuclideanDistance': 0.6447141086845463},\n",
       " {'datasetA': '4784374255821258753',\n",
       "  'datasetB': '4785099262478254081',\n",
       "  'EuclideanDistance': 0.5746990073362439},\n",
       " {'datasetA': '4784374255821258753',\n",
       "  'datasetB': '4785099261643587585',\n",
       "  'EuclideanDistance': 0.5996398256505425},\n",
       " {'datasetA': '4784374254877540353',\n",
       "  'datasetB': '4785099261681336321',\n",
       "  'EuclideanDistance': 0.25847589128195964},\n",
       " {'datasetA': '4784374253392756737',\n",
       "  'datasetB': '4785099261198991361',\n",
       "  'EuclideanDistance': 0.43246317972164794},\n",
       " {'datasetA': '4784374253392756737',\n",
       "  'datasetB': '4785099262549557249',\n",
       "  'EuclideanDistance': 0.596167311140349},\n",
       " {'datasetA': '4784374252931383297',\n",
       "  'datasetB': '4785099262130126849',\n",
       "  'EuclideanDistance': 0.5228099083556157},\n",
       " {'datasetA': '4784374252931383297',\n",
       "  'datasetB': '4785099261744250881',\n",
       "  'EuclideanDistance': 0.6380746259965435},\n",
       " {'datasetA': '4784374257645780993',\n",
       "  'datasetB': '4785099261781999617',\n",
       "  'EuclideanDistance': 0.3122355794741437},\n",
       " {'datasetA': '4784374254571356161',\n",
       "  'datasetB': '4785099261433872385',\n",
       "  'EuclideanDistance': 0.3179446473063328},\n",
       " {'datasetA': '4784374250951671809',\n",
       "  'datasetB': '4785099262201430017',\n",
       "  'EuclideanDistance': 0.49970588685355044},\n",
       " {'datasetA': '4784374253719912449',\n",
       "  'datasetB': '4785099261622616065',\n",
       "  'EuclideanDistance': 0.5635887110705523},\n",
       " {'datasetA': '4784374253719912449',\n",
       "  'datasetB': '4785099262318870529',\n",
       "  'EuclideanDistance': 0.9735332802942827},\n",
       " {'datasetA': '4784374253719912449',\n",
       "  'datasetB': '4785099261433872385',\n",
       "  'EuclideanDistance': 0.6823086710779299},\n",
       " {'datasetA': '4784714421240856577',\n",
       "  'datasetB': '4785099262369202177',\n",
       "  'EuclideanDistance': 0.4634844364594114},\n",
       " {'datasetA': '4784374256219717633',\n",
       "  'datasetB': '4785099262377590785',\n",
       "  'EuclideanDistance': 0.416993874644544},\n",
       " {'datasetA': '4784714419865124865',\n",
       "  'datasetB': '4785099261584867329',\n",
       "  'EuclideanDistance': 0.8409209191487363},\n",
       " {'datasetA': '4784374255653486593',\n",
       "  'datasetB': '4785099261958160385',\n",
       "  'EuclideanDistance': 0.356427639819471},\n",
       " {'datasetA': '4784714419479248897',\n",
       "  'datasetB': '4785099261282877441',\n",
       "  'EuclideanDistance': 1.8800640502734969},\n",
       " {'datasetA': '4784714419479248897',\n",
       "  'datasetB': '4785099262725718017',\n",
       "  'EuclideanDistance': 1.9644454763798849},\n",
       " {'datasetA': '4784374254474887169',\n",
       "  'datasetB': '4785099261249323009',\n",
       "  'EuclideanDistance': 0.5673172707023184},\n",
       " {'datasetA': '4784374254474887169',\n",
       "  'datasetB': '4785099261631004673',\n",
       "  'EuclideanDistance': 0.5181290255787019},\n",
       " {'datasetA': '4784374254474887169',\n",
       "  'datasetB': '4785099262201430017',\n",
       "  'EuclideanDistance': 0.4123475558634202},\n",
       " {'datasetA': '4784714422067134465',\n",
       "  'datasetB': '4785099261538729985',\n",
       "  'EuclideanDistance': 0.48978260822133285},\n",
       " {'datasetA': '4784714422356541441',\n",
       "  'datasetB': '4785099262272733185',\n",
       "  'EuclideanDistance': 0.7299841845269857},\n",
       " {'datasetA': '4784714422356541441',\n",
       "  'datasetB': '4785099261530341377',\n",
       "  'EuclideanDistance': 0.31064526422844707},\n",
       " {'datasetA': '4784714421719007233',\n",
       "  'datasetB': '4785099261341597697',\n",
       "  'EuclideanDistance': 0.2556623200064509},\n",
       " {'datasetA': '4784374252943966209',\n",
       "  'datasetB': '4785099262633443329',\n",
       "  'EuclideanDistance': 1.3081918061146176},\n",
       " {'datasetA': '4784374256043556865',\n",
       "  'datasetB': '4785099261798776833',\n",
       "  'EuclideanDistance': 0.4183190196893324},\n",
       " {'datasetA': '4784374254223228929',\n",
       "  'datasetB': '4785099261681336321',\n",
       "  'EuclideanDistance': 0.3708008587992241},\n",
       " {'datasetA': '4784374255808675841',\n",
       "  'datasetB': '4785099262104961025',\n",
       "  'EuclideanDistance': 0.641226066531971},\n",
       " {'datasetA': '4784374256907583489',\n",
       "  'datasetB': '4785099262486642689',\n",
       "  'EuclideanDistance': 0.4194309182634798},\n",
       " {'datasetA': '4784374256907583489',\n",
       "  'datasetB': '4785099262377590785',\n",
       "  'EuclideanDistance': 0.360290566743044},\n",
       " {'datasetA': '4784714421458960385',\n",
       "  'datasetB': '4785099261341597697',\n",
       "  'EuclideanDistance': 0.9324291825221892},\n",
       " {'datasetA': '4784374254026096641',\n",
       "  'datasetB': '4785099261488398337',\n",
       "  'EuclideanDistance': 0.9643783192155667},\n",
       " {'datasetA': '4784374256647536641',\n",
       "  'datasetB': '4785099261622616065',\n",
       "  'EuclideanDistance': 0.5913769843141256},\n",
       " {'datasetA': '4784714419613466625',\n",
       "  'datasetB': '4785099262641831937',\n",
       "  'EuclideanDistance': 0.415876505181055},\n",
       " {'datasetA': '4784714419059818497',\n",
       "  'datasetB': '4785099262507614209',\n",
       "  'EuclideanDistance': 0.3522710159195567},\n",
       " {'datasetA': '4784374254336475137',\n",
       "  'datasetB': '4785099261714890753',\n",
       "  'EuclideanDistance': 0.3229673908419034},\n",
       " {'datasetA': '4784374254336475137',\n",
       "  'datasetB': '4785099262146904065',\n",
       "  'EuclideanDistance': 0.46093661210968256},\n",
       " {'datasetA': '4784374256500736001',\n",
       "  'datasetB': '4785099262696357889',\n",
       "  'EuclideanDistance': 0.38180303949011035},\n",
       " {'datasetA': '4784374251295604737',\n",
       "  'datasetB': '4785099262281121793',\n",
       "  'EuclideanDistance': 1.5413966553483571},\n",
       " {'datasetA': '4784374258010685441',\n",
       "  'datasetB': '4785099262050435073',\n",
       "  'EuclideanDistance': 0.32088827264406267},\n",
       " {'datasetA': '4784374255754149889',\n",
       "  'datasetB': '4785099262725718017',\n",
       "  'EuclideanDistance': 0.6826021739522663},\n",
       " {'datasetA': '4784374254562967553',\n",
       "  'datasetB': '4785099262155292673',\n",
       "  'EuclideanDistance': 0.49300600124036825},\n",
       " {'datasetA': '4784714419143704577',\n",
       "  'datasetB': '4785099262138515457',\n",
       "  'EuclideanDistance': 0.619520074959226},\n",
       " {'datasetA': '4784374256186163201',\n",
       "  'datasetB': '4785099261480009729',\n",
       "  'EuclideanDistance': 1.4020613370674049},\n",
       " {'datasetA': '4784374258086182913',\n",
       "  'datasetB': '4785099262419533825',\n",
       "  'EuclideanDistance': 0.4618417067506795},\n",
       " {'datasetA': '4784714421781921793',\n",
       "  'datasetB': '4785099262578917377',\n",
       "  'EuclideanDistance': 0.49196498024373775},\n",
       " {'datasetA': '4784714421781921793',\n",
       "  'datasetB': '4785099261249323009',\n",
       "  'EuclideanDistance': 0.3883979762946335},\n",
       " {'datasetA': '4784374255884173313',\n",
       "  'datasetB': '4785099261836525569',\n",
       "  'EuclideanDistance': 0.36555923616850894},\n",
       " {'datasetA': '4784374255884173313',\n",
       "  'datasetB': '4785099261157048321',\n",
       "  'EuclideanDistance': 0.30596285858396205},\n",
       " {'datasetA': '4784374256999858177',\n",
       "  'datasetB': '4785099262058823681',\n",
       "  'EuclideanDistance': 0.36898182877592023},\n",
       " {'datasetA': '4784374254718156801',\n",
       "  'datasetB': '4785099261631004673',\n",
       "  'EuclideanDistance': 1.045716502091422},\n",
       " {'datasetA': '4784374253115932673',\n",
       "  'datasetB': '4785099261681336321',\n",
       "  'EuclideanDistance': 0.4205035285678912},\n",
       " {'datasetA': '4784714421282799617',\n",
       "  'datasetB': '4785099261115105281',\n",
       "  'EuclideanDistance': 0.5032488576265651},\n",
       " {'datasetA': '4784714421282799617',\n",
       "  'datasetB': '4785099262167875585',\n",
       "  'EuclideanDistance': 0.45382989751819397},\n",
       " {'datasetA': '4784374253870907393',\n",
       "  'datasetB': '4785099261530341377',\n",
       "  'EuclideanDistance': 0.2454575245032466},\n",
       " {'datasetA': '4784374257998102529',\n",
       "  'datasetB': '4785099262713135105',\n",
       "  'EuclideanDistance': 0.5794639786528194},\n",
       " {'datasetA': '4784374254734934017',\n",
       "  'datasetB': '4785099262033657857',\n",
       "  'EuclideanDistance': 1.2348968078107465},\n",
       " {'datasetA': '4784374254252589057',\n",
       "  'datasetB': '4785099261186408449',\n",
       "  'EuclideanDistance': 0.6309532373285657},\n",
       " {'datasetA': '4784374254999175169',\n",
       "  'datasetB': '4785099262264344577',\n",
       "  'EuclideanDistance': 0.5832230536677862},\n",
       " {'datasetA': '4784374256962109441',\n",
       "  'datasetB': '4785099262021074945',\n",
       "  'EuclideanDistance': 1.0593875583229078},\n",
       " {'datasetA': '4784374256962109441',\n",
       "  'datasetB': '4785099262461476865',\n",
       "  'EuclideanDistance': 0.7916076667697836},\n",
       " {'datasetA': '4784374254235811841',\n",
       "  'datasetB': '4785099262595694593',\n",
       "  'EuclideanDistance': 0.29888648656679495},\n",
       " {'datasetA': '4784374253157875713',\n",
       "  'datasetB': '4785099261123493889',\n",
       "  'EuclideanDistance': 0.9291495235440028},\n",
       " {'datasetA': '4784714419730907137',\n",
       "  'datasetB': '4785099261928800257',\n",
       "  'EuclideanDistance': 0.6693060509828193},\n",
       " {'datasetA': '4784714419730907137',\n",
       "  'datasetB': '4785099261261905921',\n",
       "  'EuclideanDistance': 0.4265591886610408},\n",
       " {'datasetA': '4784374251559845889',\n",
       "  'datasetB': '4785099261907828737',\n",
       "  'EuclideanDistance': 0.28531191326407884},\n",
       " {'datasetA': '4784374251559845889',\n",
       "  'datasetB': '4785099262478254081',\n",
       "  'EuclideanDistance': 0.41713863982184507},\n",
       " {'datasetA': '4784714421635121153',\n",
       "  'datasetB': '4785099262562140161',\n",
       "  'EuclideanDistance': 0.45690169840073963},\n",
       " {'datasetA': '4784714421635121153',\n",
       "  'datasetB': '4785099261815554049',\n",
       "  'EuclideanDistance': 0.45224875720779034},\n",
       " {'datasetA': '4784714422478176257',\n",
       "  'datasetB': '4785099261798776833',\n",
       "  'EuclideanDistance': 0.5090297046014017},\n",
       " {'datasetA': '4784714422478176257',\n",
       "  'datasetB': '4785099262167875585',\n",
       "  'EuclideanDistance': 0.9618902502280287},\n",
       " {'datasetA': '4784374254932066305',\n",
       "  'datasetB': '4785099262570528769',\n",
       "  'EuclideanDistance': 0.4899884226234564},\n",
       " {'datasetA': '4784374254932066305',\n",
       "  'datasetB': '4785099261341597697',\n",
       "  'EuclideanDistance': 0.8353097363061763},\n",
       " {'datasetA': '4784374256119054337',\n",
       "  'datasetB': '4785099261463232513',\n",
       "  'EuclideanDistance': 0.8499216393165497},\n",
       " {'datasetA': '4784374256119054337',\n",
       "  'datasetB': '4785099261312237569',\n",
       "  'EuclideanDistance': 0.5482009791817949},\n",
       " {'datasetA': '4784374256110665729',\n",
       "  'datasetB': '4785099262448893953',\n",
       "  'EuclideanDistance': 0.5290697307729086},\n",
       " {'datasetA': '4784374256110665729',\n",
       "  'datasetB': '4785099261052190721',\n",
       "  'EuclideanDistance': 0.6504117353641997},\n",
       " {'datasetA': '4784374256110665729',\n",
       "  'datasetB': '4785099262155292673',\n",
       "  'EuclideanDistance': 0.539102316615327},\n",
       " {'datasetA': '4784374254755905537',\n",
       "  'datasetB': '4785099261341597697',\n",
       "  'EuclideanDistance': 0.706943025568778},\n",
       " {'datasetA': '4784374251245273089',\n",
       "  'datasetB': '4785099262633443329',\n",
       "  'EuclideanDistance': 0.6700211184228992},\n",
       " {'datasetA': '4784374256563650561',\n",
       "  'datasetB': '4785099262625054721',\n",
       "  'EuclideanDistance': 1.38357859112688},\n",
       " {'datasetA': '4784374257620615169',\n",
       "  'datasetB': '4785099261714890753',\n",
       "  'EuclideanDistance': 0.4009685791362008},\n",
       " {'datasetA': '4784374255540240385',\n",
       "  'datasetB': '4785099261698113537',\n",
       "  'EuclideanDistance': 0.3854726380150193},\n",
       " {'datasetA': '4784374255540240385',\n",
       "  'datasetB': '4785099262281121793',\n",
       "  'EuclideanDistance': 0.9863661574506203},\n",
       " {'datasetA': '4784714421257633793',\n",
       "  'datasetB': '4785099261958160385',\n",
       "  'EuclideanDistance': 0.28329267684476295},\n",
       " {'datasetA': '4784714421257633793',\n",
       "  'datasetB': '4785099261643587585',\n",
       "  'EuclideanDistance': 0.8271004421570906},\n",
       " {'datasetA': '4784374255112421377',\n",
       "  'datasetB': '4785099262042046465',\n",
       "  'EuclideanDistance': 0.26100073766861226},\n",
       " {'datasetA': '4784374255569600513',\n",
       "  'datasetB': '4785099261966548993',\n",
       "  'EuclideanDistance': 0.4580968818012304},\n",
       " {'datasetA': '4784714419093372929',\n",
       "  'datasetB': '4785099261480009729',\n",
       "  'EuclideanDistance': 0.7798757998695703},\n",
       " {'datasetA': '4784374252990103553',\n",
       "  'datasetB': '4785099261094133761',\n",
       "  'EuclideanDistance': 0.7398812721272343},\n",
       " {'datasetA': '4784714419386974209',\n",
       "  'datasetB': '4785099262725718017',\n",
       "  'EuclideanDistance': 0.33371911765730966},\n",
       " {'datasetA': '4784714421429600257',\n",
       "  'datasetB': '4785099262079795201',\n",
       "  'EuclideanDistance': 0.3851580457818156},\n",
       " {'datasetA': '4784374252801359873',\n",
       "  'datasetB': '4785099262633443329',\n",
       "  'EuclideanDistance': 1.8305299470108112},\n",
       " {'datasetA': '4784374256609787905',\n",
       "  'datasetB': '4785099262440505345',\n",
       "  'EuclideanDistance': 0.9835908139081397},\n",
       " {'datasetA': '4784714419177259009',\n",
       "  'datasetB': '4785099261660364801',\n",
       "  'EuclideanDistance': 0.4994145751577622},\n",
       " {'datasetA': '4784714419177259009',\n",
       "  'datasetB': '4785099262385979393',\n",
       "  'EuclideanDistance': 0.6549871535963858},\n",
       " {'datasetA': '4784374255917727745',\n",
       "  'datasetB': '4785099262369202177',\n",
       "  'EuclideanDistance': 0.5632743141537548},\n",
       " {'datasetA': '4784374255917727745',\n",
       "  'datasetB': '4785099262671192065',\n",
       "  'EuclideanDistance': 1.0588577005760282},\n",
       " {'datasetA': '4784714419441500161',\n",
       "  'datasetB': '4785099261744250881',\n",
       "  'EuclideanDistance': 0.5268020593084349},\n",
       " {'datasetA': '4784714421542846465',\n",
       "  'datasetB': '4785099261798776833',\n",
       "  'EuclideanDistance': 0.3444942649049231},\n",
       " {'datasetA': '4784714422373318657',\n",
       "  'datasetB': '4785099261341597697',\n",
       "  'EuclideanDistance': 0.43843199422751894},\n",
       " {'datasetA': '4784374251664703489',\n",
       "  'datasetB': '4785099262079795201',\n",
       "  'EuclideanDistance': 1.519199426544395},\n",
       " {'datasetA': '4784374251664703489',\n",
       "  'datasetB': '4785099262377590785',\n",
       "  'EuclideanDistance': 1.8187748421338046},\n",
       " {'datasetA': '4784374255015952385',\n",
       "  'datasetB': '4785099261064773633',\n",
       "  'EuclideanDistance': 0.695782951293347},\n",
       " {'datasetA': '4784714422192963585',\n",
       "  'datasetB': '4785099262033657857',\n",
       "  'EuclideanDistance': 0.2563236755234322},\n",
       " {'datasetA': '4784714422192963585',\n",
       "  'datasetB': '4785099262641831937',\n",
       "  'EuclideanDistance': 0.23651264339631203},\n",
       " {'datasetA': '4784374255510880257',\n",
       "  'datasetB': '4785099262193041409',\n",
       "  'EuclideanDistance': 0.7113286499078769},\n",
       " {'datasetA': '4784374255510880257',\n",
       "  'datasetB': '4785099262176264193',\n",
       "  'EuclideanDistance': 0.396164965003249},\n",
       " {'datasetA': '4784374255510880257',\n",
       "  'datasetB': '4785099261219962881',\n",
       "  'EuclideanDistance': 1.63079029773997},\n",
       " {'datasetA': '4784374257788387329',\n",
       "  'datasetB': '4785099262302093313',\n",
       "  'EuclideanDistance': 0.7427028702833794},\n",
       " {'datasetA': '4784374257788387329',\n",
       "  'datasetB': '4785099262419533825',\n",
       "  'EuclideanDistance': 0.8107837785935397},\n",
       " {'datasetA': '4784714419324059649',\n",
       "  'datasetB': '4785099262281121793',\n",
       "  'EuclideanDistance': 1.4700968848354652},\n",
       " {'datasetA': '4784714419324059649',\n",
       "  'datasetB': '4785099261488398337',\n",
       "  'EuclideanDistance': 1.424760939708709},\n",
       " {'datasetA': '4784374254424555521',\n",
       "  'datasetB': '4785099261488398337',\n",
       "  'EuclideanDistance': 0.8539115108317837},\n",
       " {'datasetA': '4784374256987275265',\n",
       "  'datasetB': '4785099261891051521',\n",
       "  'EuclideanDistance': 0.7270700107033671},\n",
       " {'datasetA': '4784374256442015745',\n",
       "  'datasetB': '4785099262369202177',\n",
       "  'EuclideanDistance': 0.41990234406540655},\n",
       " {'datasetA': '4784374256442015745',\n",
       "  'datasetB': '4785099261073162241',\n",
       "  'EuclideanDistance': 0.5073568421962621},\n",
       " {'datasetA': '4784374254743322625',\n",
       "  'datasetB': '4785099261433872385',\n",
       "  'EuclideanDistance': 0.7129593669386258},\n",
       " {'datasetA': '4784374257322819585',\n",
       "  'datasetB': '4785099262369202177',\n",
       "  'EuclideanDistance': 0.40328039268843197},\n",
       " {'datasetA': '4784374257322819585',\n",
       "  'datasetB': '4785099261043802113',\n",
       "  'EuclideanDistance': 0.40916412371256256},\n",
       " {'datasetA': '4784374257322819585',\n",
       "  'datasetB': '4785099262469865473',\n",
       "  'EuclideanDistance': 0.4759598622704742},\n",
       " {'datasetA': '4784374254583939073',\n",
       "  'datasetB': '4785099262549557249',\n",
       "  'EuclideanDistance': 0.6156372958391801},\n",
       " {'datasetA': '4784374256781754369',\n",
       "  'datasetB': '4785099261807165441',\n",
       "  'EuclideanDistance': 0.6223592352664269},\n",
       " {'datasetA': '4784374256781754369',\n",
       "  'datasetB': '4785099261136076801',\n",
       "  'EuclideanDistance': 0.6137786012624008},\n",
       " {'datasetA': '4784714419563134977',\n",
       "  'datasetB': '4785099262201430017',\n",
       "  'EuclideanDistance': 0.6473893287084393},\n",
       " {'datasetA': '4784374255125004289',\n",
       "  'datasetB': '4785099261052190721',\n",
       "  'EuclideanDistance': 0.8565961196335211},\n",
       " {'datasetA': '4784374255125004289',\n",
       "  'datasetB': '4785099262696357889',\n",
       "  'EuclideanDistance': 0.8179185651129957},\n",
       " {'datasetA': '4784374255125004289',\n",
       "  'datasetB': '4785099262541168641',\n",
       "  'EuclideanDistance': 1.208019301825919},\n",
       " {'datasetA': '4784374257368956929',\n",
       "  'datasetB': '4785099261698113537',\n",
       "  'EuclideanDistance': 0.7347842261490837},\n",
       " {'datasetA': '4784374254109982721',\n",
       "  'datasetB': '4785099262033657857',\n",
       "  'EuclideanDistance': 0.5445950427854561},\n",
       " {'datasetA': '4784374253057212417',\n",
       "  'datasetB': '4785099261714890753',\n",
       "  'EuclideanDistance': 0.6180646098785607},\n",
       " {'datasetA': '4784374253224984577',\n",
       "  'datasetB': '4785099261282877441',\n",
       "  'EuclideanDistance': 0.3548489111569395},\n",
       " {'datasetA': '4784374258170068993',\n",
       "  'datasetB': '4785099262339842049',\n",
       "  'EuclideanDistance': 0.4297453799438982},\n",
       " {'datasetA': '4784374258170068993',\n",
       "  'datasetB': '4785099261714890753',\n",
       "  'EuclideanDistance': 0.36460039283481},\n",
       " {'datasetA': '4784374255024340993',\n",
       "  'datasetB': '4785099262608277505',\n",
       "  'EuclideanDistance': 0.5068631167322664},\n",
       " {'datasetA': '4784374251274633217',\n",
       "  'datasetB': '4785099262578917377',\n",
       "  'EuclideanDistance': 0.6248877398462598},\n",
       " {'datasetA': '4784374254533607425',\n",
       "  'datasetB': '4785099261538729985',\n",
       "  'EuclideanDistance': 0.3329722307706399},\n",
       " {'datasetA': '4784374257893244929',\n",
       "  'datasetB': '4785099261958160385',\n",
       "  'EuclideanDistance': 0.7391185958510207},\n",
       " {'datasetA': '4784374257893244929',\n",
       "  'datasetB': '4785099262293704705',\n",
       "  'EuclideanDistance': 0.9599748951628189},\n",
       " {'datasetA': '4784374257893244929',\n",
       "  'datasetB': '4785099262499225601',\n",
       "  'EuclideanDistance': 0.7544154657427506},\n",
       " {'datasetA': '4784374257943576577',\n",
       "  'datasetB': '4785099261899440129',\n",
       "  'EuclideanDistance': 0.4331367370283328},\n",
       " {'datasetA': '4784374257943576577',\n",
       "  'datasetB': '4785099262419533825',\n",
       "  'EuclideanDistance': 0.7722731487876081},\n",
       " {'datasetA': '4784374256735617025',\n",
       "  'datasetB': '4785099261916217345',\n",
       "  'EuclideanDistance': 0.49481492614886713},\n",
       " {'datasetA': '4784374256848863233',\n",
       "  'datasetB': '4785099261261905921',\n",
       "  'EuclideanDistance': 0.29363024791283165},\n",
       " {'datasetA': '4784714419051429889',\n",
       "  'datasetB': '4785099261672947713',\n",
       "  'EuclideanDistance': 0.5963116844958081},\n",
       " {'datasetA': '4784374254827208705',\n",
       "  'datasetB': '4785099261530341377',\n",
       "  'EuclideanDistance': 1.8766391094788066},\n",
       " {'datasetA': '4784374254827208705',\n",
       "  'datasetB': '4785099261442260993',\n",
       "  'EuclideanDistance': 1.7342367692564347},\n",
       " {'datasetA': '4784374257872273409',\n",
       "  'datasetB': '4785099261291266049',\n",
       "  'EuclideanDistance': 0.4374067081629002},\n",
       " {'datasetA': '4784374257872273409',\n",
       "  'datasetB': '4785099261500981249',\n",
       "  'EuclideanDistance': 0.5587719054265388},\n",
       " {'datasetA': '4784374255057895425',\n",
       "  'datasetB': '4785099261207379969',\n",
       "  'EuclideanDistance': 0.2935536574655871},\n",
       " {'datasetA': '4784374255057895425',\n",
       "  'datasetB': '4785099261744250881',\n",
       "  'EuclideanDistance': 0.6023385575489023},\n",
       " {'datasetA': '4784374255057895425',\n",
       "  'datasetB': '4785099261442260993',\n",
       "  'EuclideanDistance': 0.5632759454666123},\n",
       " {'datasetA': '4784374255036923905',\n",
       "  'datasetB': '4785099261031219201',\n",
       "  'EuclideanDistance': 0.28433420493746975},\n",
       " {'datasetA': '4784374253573111809',\n",
       "  'datasetB': '4785099261043802113',\n",
       "  'EuclideanDistance': 0.7378419664304828},\n",
       " {'datasetA': '4784374253573111809',\n",
       "  'datasetB': '4785099261941383169',\n",
       "  'EuclideanDistance': 0.5800982739540308},\n",
       " {'datasetA': '4784374253992542209',\n",
       "  'datasetB': '4785099262520197121',\n",
       "  'EuclideanDistance': 1.2658853272315262},\n",
       " {'datasetA': '4784374253044629505',\n",
       "  'datasetB': '4785099261219962881',\n",
       "  'EuclideanDistance': 1.5425824038320102},\n",
       " {'datasetA': '4784374253929627649',\n",
       "  'datasetB': '4785099261186408449',\n",
       "  'EuclideanDistance': 0.33429042775304835},\n",
       " {'datasetA': '4784374253522780161',\n",
       "  'datasetB': '4785099261052190721',\n",
       "  'EuclideanDistance': 0.4207573407909971},\n",
       " {'datasetA': '4784374253522780161',\n",
       "  'datasetB': '4785099262616666113',\n",
       "  'EuclideanDistance': 0.5306287218513426},\n",
       " {'datasetA': '4784374252872663041',\n",
       "  'datasetB': '4785099261291266049',\n",
       "  'EuclideanDistance': 0.3755218510099157},\n",
       " {'datasetA': '4784374252872663041',\n",
       "  'datasetB': '4785099262121738241',\n",
       "  'EuclideanDistance': 0.43895614500982694},\n",
       " {'datasetA': '4784374252872663041',\n",
       "  'datasetB': '4785099261672947713',\n",
       "  'EuclideanDistance': 0.5528922019745218},\n",
       " {'datasetA': '4784374254600716289',\n",
       "  'datasetB': '4785099261094133761',\n",
       "  'EuclideanDistance': 0.5534344069902405},\n",
       " {'datasetA': '4784374254600716289',\n",
       "  'datasetB': '4785099261853302785',\n",
       "  'EuclideanDistance': 0.7491605121322826},\n",
       " {'datasetA': '4784374254600716289',\n",
       "  'datasetB': '4785099262209818625',\n",
       "  'EuclideanDistance': 0.9659517445814803},\n",
       " {'datasetA': '4784714419433111553',\n",
       "  'datasetB': '4785099262222401537',\n",
       "  'EuclideanDistance': 0.8237984313012625},\n",
       " {'datasetA': '4784374257595449345',\n",
       "  'datasetB': '4785099262671192065',\n",
       "  'EuclideanDistance': 0.782610449374352},\n",
       " {'datasetA': '4784714422260072449',\n",
       "  'datasetB': '4785099262050435073',\n",
       "  'EuclideanDistance': 0.3804976738066038},\n",
       " {'datasetA': '4784374251513708545',\n",
       "  'datasetB': '4785099261593255937',\n",
       "  'EuclideanDistance': 1.5902211079575013},\n",
       " {'datasetA': '4784714422448816129',\n",
       "  'datasetB': '4785099261853302785',\n",
       "  'EuclideanDistance': 0.5754842450935922},\n",
       " {'datasetA': '4784374253094961153',\n",
       "  'datasetB': '4785099261299654657',\n",
       "  'EuclideanDistance': 1.0320786499898158},\n",
       " {'datasetA': '4784374253510197249',\n",
       "  'datasetB': '4785099261643587585',\n",
       "  'EuclideanDistance': 0.5143821532237096},\n",
       " {'datasetA': '4784374253510197249',\n",
       "  'datasetB': '4785099262725718017',\n",
       "  'EuclideanDistance': 0.688660430196713},\n",
       " {'datasetA': '4784374256534290433',\n",
       "  'datasetB': '4785099261844914177',\n",
       "  'EuclideanDistance': 0.2598927603533748},\n",
       " {'datasetA': '4784714421735784449',\n",
       "  'datasetB': '4785099261157048321',\n",
       "  'EuclideanDistance': 0.3715462717528709},\n",
       " {'datasetA': '4784714421735784449',\n",
       "  'datasetB': '4785099261815554049',\n",
       "  'EuclideanDistance': 0.42572016401677476},\n",
       " {'datasetA': '4784714419705741313',\n",
       "  'datasetB': '4785099261584867329',\n",
       "  'EuclideanDistance': 1.6500663006473493},\n",
       " {'datasetA': '4784374255733178369',\n",
       "  'datasetB': '4785099262369202177',\n",
       "  'EuclideanDistance': 0.3698167363378122},\n",
       " {'datasetA': '4784714422327181313',\n",
       "  'datasetB': '4785099261798776833',\n",
       "  'EuclideanDistance': 0.3484188906607267},\n",
       " {'datasetA': '4784374253417922561',\n",
       "  'datasetB': '4785099262176264193',\n",
       "  'EuclideanDistance': 0.6321929407551014},\n",
       " {'datasetA': '4784374253417922561',\n",
       "  'datasetB': '4785099261341597697',\n",
       "  'EuclideanDistance': 0.42567979698144204},\n",
       " {'datasetA': '4784714419634438145',\n",
       "  'datasetB': '4785099262440505345',\n",
       "  'EuclideanDistance': 1.2204137037718628},\n",
       " {'datasetA': '4784714419739295745',\n",
       "  'datasetB': '4785099261756833793',\n",
       "  'EuclideanDistance': 0.2416936484024392},\n",
       " {'datasetA': '4784374255066284033',\n",
       "  'datasetB': '4785099261249323009',\n",
       "  'EuclideanDistance': 0.3711400739256081},\n",
       " {'datasetA': '4784374254403584001',\n",
       "  'datasetB': '4785099262138515457',\n",
       "  'EuclideanDistance': 0.36452643154154457},\n",
       " {'datasetA': '4784374255846424577',\n",
       "  'datasetB': '4785099261282877441',\n",
       "  'EuclideanDistance': 0.48941105016191994},\n",
       " {'datasetA': '4784374253891878913',\n",
       "  'datasetB': '4785099262021074945',\n",
       "  'EuclideanDistance': 0.8279664621976076},\n",
       " {'datasetA': '4784374253891878913',\n",
       "  'datasetB': '4785099262167875585',\n",
       "  'EuclideanDistance': 0.6244188936707724},\n",
       " {'datasetA': '4784374253891878913',\n",
       "  'datasetB': '4785099262696357889',\n",
       "  'EuclideanDistance': 0.24406190467185845},\n",
       " {'datasetA': '4784714419340836865',\n",
       "  'datasetB': '4785099261299654657',\n",
       "  'EuclideanDistance': 0.4695073720732554},\n",
       " {'datasetA': '4784714421773533185',\n",
       "  'datasetB': '4785099262327259137',\n",
       "  'EuclideanDistance': 0.7631830258719438},\n",
       " {'datasetA': '4784714421773533185',\n",
       "  'datasetB': '4785099261073162241',\n",
       "  'EuclideanDistance': 0.4531418219908167},\n",
       " {'datasetA': '4784374255364079617',\n",
       "  'datasetB': '4785099262318870529',\n",
       "  'EuclideanDistance': 0.46642553216661053},\n",
       " {'datasetA': '4784714419424722945',\n",
       "  'datasetB': '4785099261425483777',\n",
       "  'EuclideanDistance': 0.5740425228552073},\n",
       " {'datasetA': '4784374251467571201',\n",
       "  'datasetB': '4785099262650220545',\n",
       "  'EuclideanDistance': 0.4134276812880518},\n",
       " {'datasetA': '4784374251467571201',\n",
       "  'datasetB': '4785099262696357889',\n",
       "  'EuclideanDistance': 0.4369368485369189},\n",
       " {'datasetA': '4784714421391851521',\n",
       "  'datasetB': '4785099261094133761',\n",
       "  'EuclideanDistance': 0.7285631282438967},\n",
       " {'datasetA': '4784714421391851521',\n",
       "  'datasetB': '4785099261085745153',\n",
       "  'EuclideanDistance': 0.3979231341519298},\n",
       " {'datasetA': '4784714421391851521',\n",
       "  'datasetB': '4785099261106716673',\n",
       "  'EuclideanDistance': 0.5814063769227293},\n",
       " {'datasetA': '4784714421375074305',\n",
       "  'datasetB': '4785099261790388225',\n",
       "  'EuclideanDistance': 0.7767424148390017},\n",
       " {'datasetA': '4784374253598277633',\n",
       "  'datasetB': '4785099261941383169',\n",
       "  'EuclideanDistance': 0.2635083183972119},\n",
       " {'datasetA': '4784374253598277633',\n",
       "  'datasetB': '4785099261790388225',\n",
       "  'EuclideanDistance': 0.45369827000477675},\n",
       " {'datasetA': '4784374253304676353',\n",
       "  'datasetB': '4785099262079795201',\n",
       "  'EuclideanDistance': 0.43639295207825846},\n",
       " {'datasetA': '4784374253304676353',\n",
       "  'datasetB': '4785099261878468609',\n",
       "  'EuclideanDistance': 1.4433790447707722},\n",
       " {'datasetA': '4784374254940454913',\n",
       "  'datasetB': '4785099261354180609',\n",
       "  'EuclideanDistance': 0.5559073364815844},\n",
       " {'datasetA': '4784374253078183937',\n",
       "  'datasetB': '4785099262658609153',\n",
       "  'EuclideanDistance': 0.7932583943482889},\n",
       " {'datasetA': '4784374253078183937',\n",
       "  'datasetB': '4785099261807165441',\n",
       "  'EuclideanDistance': 0.893332661913184},\n",
       " {'datasetA': '4784374255460548609',\n",
       "  'datasetB': '4785099261412900865',\n",
       "  'EuclideanDistance': 1.7144414411329332},\n",
       " {'datasetA': '4784374255460548609',\n",
       "  'datasetB': '4785099262264344577',\n",
       "  'EuclideanDistance': 0.5674166819888656},\n",
       " {'datasetA': '4784374250855202817',\n",
       "  'datasetB': '4785099261115105281',\n",
       "  'EuclideanDistance': 0.6546451003481758},\n",
       " {'datasetA': '4784374257536729089',\n",
       "  'datasetB': '4785099262155292673',\n",
       "  'EuclideanDistance': 0.9864712828015079},\n",
       " {'datasetA': '4784374256760782849',\n",
       "  'datasetB': '4785099261106716673',\n",
       "  'EuclideanDistance': 0.5330794045430504},\n",
       " {'datasetA': '4784374255343108097',\n",
       "  'datasetB': '4785099261643587585',\n",
       "  'EuclideanDistance': 0.7895388909346198},\n",
       " {'datasetA': '4784714421316354049',\n",
       "  'datasetB': '4785099262167875585',\n",
       "  'EuclideanDistance': 0.8740252141264494},\n",
       " {'datasetA': '4784714421316354049',\n",
       "  'datasetB': '4785099262138515457',\n",
       "  'EuclideanDistance': 0.6456542516546002},\n",
       " {'datasetA': '4784374256211329025',\n",
       "  'datasetB': '4785099261727473665',\n",
       "  'EuclideanDistance': 0.2371781504294234},\n",
       " {'datasetA': '4784714418950766593',\n",
       "  'datasetB': '4785099262318870529',\n",
       "  'EuclideanDistance': 0.7949416446775509},\n",
       " {'datasetA': '4784714421727395841',\n",
       "  'datasetB': '4785099262440505345',\n",
       "  'EuclideanDistance': 1.2867056869877005},\n",
       " {'datasetA': '4784714419554746369',\n",
       "  'datasetB': '4785099261094133761',\n",
       "  'EuclideanDistance': 1.1001340275700087},\n",
       " {'datasetA': '4784374254483275777',\n",
       "  'datasetB': '4785099262650220545',\n",
       "  'EuclideanDistance': 0.3598788884557338},\n",
       " {'datasetA': '4784374254483275777',\n",
       "  'datasetB': '4785099261836525569',\n",
       "  'EuclideanDistance': 0.34194306931195145},\n",
       " {'datasetA': '4784374254483275777',\n",
       "  'datasetB': '4785099262633443329',\n",
       "  'EuclideanDistance': 0.6882166663639908},\n",
       " {'datasetA': '4784374251454988289',\n",
       "  'datasetB': '4785099262687969281',\n",
       "  'EuclideanDistance': 0.44912224712540116},\n",
       " {'datasetA': '4784374251454988289',\n",
       "  'datasetB': '4785099262507614209',\n",
       "  'EuclideanDistance': 0.7058121961681706},\n",
       " {'datasetA': '4784374254260977665',\n",
       "  'datasetB': '4785099261593255937',\n",
       "  'EuclideanDistance': 0.40154917299279075},\n",
       " {'datasetA': '4784714422545285121',\n",
       "  'datasetB': '4785099261681336321',\n",
       "  'EuclideanDistance': 0.4707784243787123},\n",
       " {'datasetA': '4784714419454083073',\n",
       "  'datasetB': '4785099261412900865',\n",
       "  'EuclideanDistance': 1.6424416137764977},\n",
       " {'datasetA': '4784714419454083073',\n",
       "  'datasetB': '4785099262440505345',\n",
       "  'EuclideanDistance': 1.1307137488295567},\n",
       " {'datasetA': '4784374251484348417',\n",
       "  'datasetB': '4785099261219962881',\n",
       "  'EuclideanDistance': 1.8092243908784535},\n",
       " {'datasetA': '4784374258128125953',\n",
       "  'datasetB': '4785099261312237569',\n",
       "  'EuclideanDistance': 0.5088785723787066},\n",
       " {'datasetA': '4784374256249077761',\n",
       "  'datasetB': '4785099261463232513',\n",
       "  'EuclideanDistance': 1.2437154636094139},\n",
       " {'datasetA': '4784714422230712321',\n",
       "  'datasetB': '4785099262687969281',\n",
       "  'EuclideanDistance': 0.7117874591334286},\n",
       " {'datasetA': '4784714422230712321',\n",
       "  'datasetB': '4785099261085745153',\n",
       "  'EuclideanDistance': 0.9093028590183496},\n",
       " {'datasetA': '4784714422230712321',\n",
       "  'datasetB': '4785099262050435073',\n",
       "  'EuclideanDistance': 0.5806719769825364},\n",
       " {'datasetA': '4784374257800970241',\n",
       "  'datasetB': '4785099262302093313',\n",
       "  'EuclideanDistance': 0.3964346208307195},\n",
       " {'datasetA': '4784714421643509761',\n",
       "  'datasetB': '4785099261631004673',\n",
       "  'EuclideanDistance': 0.37419265803905505},\n",
       " {'datasetA': '4784714422075523073',\n",
       "  'datasetB': '4785099261698113537',\n",
       "  'EuclideanDistance': 0.7206044521079611},\n",
       " {'datasetA': '4784374253258539009',\n",
       "  'datasetB': '4785099261622616065',\n",
       "  'EuclideanDistance': 0.4082155563547189},\n",
       " {'datasetA': '4784374251371102209',\n",
       "  'datasetB': '4785099262549557249',\n",
       "  'EuclideanDistance': 0.5156939742387228},\n",
       " {'datasetA': '4784374254189674497',\n",
       "  'datasetB': '4785099262042046465',\n",
       "  'EuclideanDistance': 0.3394913717739442},\n",
       " {'datasetA': '4784374257410899969',\n",
       "  'datasetB': '4785099261312237569',\n",
       "  'EuclideanDistance': 1.0112527415190853},\n",
       " {'datasetA': '4784714419793821697',\n",
       "  'datasetB': '4785099262146904065',\n",
       "  'EuclideanDistance': 0.5453845208531855},\n",
       " {'datasetA': '4784374256131637249',\n",
       "  'datasetB': '4785099261899440129',\n",
       "  'EuclideanDistance': 1.6793949203203384},\n",
       " {'datasetA': '4784714421689647105',\n",
       "  'datasetB': '4785099262562140161',\n",
       "  'EuclideanDistance': 1.0333918533859694},\n",
       " {'datasetA': '4784374255561211905',\n",
       "  'datasetB': '4785099261660364801',\n",
       "  'EuclideanDistance': 0.47326209721076407},\n",
       " {'datasetA': '4784374255561211905',\n",
       "  'datasetB': '4785099262633443329',\n",
       "  'EuclideanDistance': 0.6912674628331272},\n",
       " {'datasetA': '4784374256194551809',\n",
       "  'datasetB': '4785099261219962881',\n",
       "  'EuclideanDistance': 1.9001130190377507},\n",
       " {'datasetA': '4784374255305359361',\n",
       "  'datasetB': '4785099262113349633',\n",
       "  'EuclideanDistance': 1.9967112796613717},\n",
       " {'datasetA': '4784374255305359361',\n",
       "  'datasetB': '4785099261442260993',\n",
       "  'EuclideanDistance': 0.49515001688047816},\n",
       " {'datasetA': '4784374257117298689',\n",
       "  'datasetB': '4785099262486642689',\n",
       "  'EuclideanDistance': 0.9026662248627102},\n",
       " {'datasetA': '4784374253543751681',\n",
       "  'datasetB': '4785099262713135105',\n",
       "  'EuclideanDistance': 0.48076073877927883},\n",
       " {'datasetA': '4784374253543751681',\n",
       "  'datasetB': '4785099262461476865',\n",
       "  'EuclideanDistance': 0.6301240696033967},\n",
       " {'datasetA': '4784374256723034113',\n",
       "  'datasetB': '4785099261312237569',\n",
       "  'EuclideanDistance': 1.9327256876313215},\n",
       " {'datasetA': '4784374253531168769',\n",
       "  'datasetB': '4785099261123493889',\n",
       "  'EuclideanDistance': 0.46052828774458243},\n",
       " {'datasetA': '4784374255007563777',\n",
       "  'datasetB': '4785099261836525569',\n",
       "  'EuclideanDistance': 0.6262716230044666},\n",
       " {'datasetA': '4784374255007563777',\n",
       "  'datasetB': '4785099261756833793',\n",
       "  'EuclideanDistance': 0.8690127606028099},\n",
       " {'datasetA': '4784374255007563777',\n",
       "  'datasetB': '4785099262104961025',\n",
       "  'EuclideanDistance': 1.027900852702092},\n",
       " {'datasetA': '4784374250972643329',\n",
       "  'datasetB': '4785099262155292673',\n",
       "  'EuclideanDistance': 0.5011158556575117},\n",
       " {'datasetA': '4784374250972643329',\n",
       "  'datasetB': '4785099261073162241',\n",
       "  'EuclideanDistance': 0.7312825888630384},\n",
       " {'datasetA': '4784714419114344449',\n",
       "  'datasetB': '4785099261798776833',\n",
       "  'EuclideanDistance': 0.2839738722184359},\n",
       " {'datasetA': '4784714422553673729',\n",
       "  'datasetB': '4785099262339842049',\n",
       "  'EuclideanDistance': 0.9879817460605296},\n",
       " {'datasetA': '4784374254042873857',\n",
       "  'datasetB': '4785099262339842049',\n",
       "  'EuclideanDistance': 0.7954653785212267},\n",
       " {'datasetA': '4784374251157192705',\n",
       "  'datasetB': '4785099261064773633',\n",
       "  'EuclideanDistance': 0.5347608379833755},\n",
       " {'datasetA': '4784374257566089217',\n",
       "  'datasetB': '4785099262050435073',\n",
       "  'EuclideanDistance': 0.3687543261206057},\n",
       " {'datasetA': '4784374254160314369',\n",
       "  'datasetB': '4785099262293704705',\n",
       "  'EuclideanDistance': 1.60047409116615},\n",
       " {'datasetA': '4784374256165191681',\n",
       "  'datasetB': '4785099262033657857',\n",
       "  'EuclideanDistance': 0.48502709740191696},\n",
       " {'datasetA': '4784374256165191681',\n",
       "  'datasetB': '4785099261341597697',\n",
       "  'EuclideanDistance': 0.7007391085235297},\n",
       " {'datasetA': '4784374256165191681',\n",
       "  'datasetB': '4785099262293704705',\n",
       "  'EuclideanDistance': 0.964656100306387},\n",
       " {'datasetA': '4784374250788093953',\n",
       "  'datasetB': '4785099261425483777',\n",
       "  'EuclideanDistance': 0.9100902044127227},\n",
       " {'datasetA': '4784714421580595201',\n",
       "  'datasetB': '4785099262209818625',\n",
       "  'EuclideanDistance': 0.5429423142519414},\n",
       " {'datasetA': '4784714419579912193',\n",
       "  'datasetB': '4785099262650220545',\n",
       "  'EuclideanDistance': 0.8420998878183704},\n",
       " {'datasetA': '4784714419579912193',\n",
       "  'datasetB': '4785099261958160385',\n",
       "  'EuclideanDistance': 0.45112338023570675},\n",
       " {'datasetA': '4784374254806237185',\n",
       "  'datasetB': '4785099261073162241',\n",
       "  'EuclideanDistance': 0.36942768945721804},\n",
       " {'datasetA': '4784714421182136321',\n",
       "  'datasetB': '4785099261094133761',\n",
       "  'EuclideanDistance': 0.5143207588890132},\n",
       " {'datasetA': '4784714419269533697',\n",
       "  'datasetB': '4785099262193041409',\n",
       "  'EuclideanDistance': 0.5941877962468896},\n",
       " {'datasetA': '4784374255687041025',\n",
       "  'datasetB': '4785099262012686337',\n",
       "  'EuclideanDistance': 0.8231128956776285},\n",
       " {'datasetA': '4784374255687041025',\n",
       "  'datasetB': '4785099261660364801',\n",
       "  'EuclideanDistance': 0.665398113150317},\n",
       " {'datasetA': '4784374256744005633',\n",
       "  'datasetB': '4785099261123493889',\n",
       "  'EuclideanDistance': 0.9592331166331364},\n",
       " {'datasetA': '4784374256744005633',\n",
       "  'datasetB': '4785099261073162241',\n",
       "  'EuclideanDistance': 0.5290793521022908},\n",
       " {'datasetA': '4784374253178847233',\n",
       "  'datasetB': '4785099262385979393',\n",
       "  'EuclideanDistance': 0.8677740427175127},\n",
       " {'datasetA': '4784374256827891713',\n",
       "  'datasetB': '4785099261605838849',\n",
       "  'EuclideanDistance': 0.4713980076775385},\n",
       " {'datasetA': '4784714421232467969',\n",
       "  'datasetB': '4785099261660364801',\n",
       "  'EuclideanDistance': 0.8932125293141484},\n",
       " {'datasetA': '4784374252897828865',\n",
       "  'datasetB': '4785099261698113537',\n",
       "  'EuclideanDistance': 0.692255912271616},\n",
       " {'datasetA': '4784374257075355649',\n",
       "  'datasetB': '4785099261744250881',\n",
       "  'EuclideanDistance': 0.43661291671705865},\n",
       " {'datasetA': '4784714421354102785',\n",
       "  'datasetB': '4785099262478254081',\n",
       "  'EuclideanDistance': 0.8897217088335321},\n",
       " {'datasetA': '4784714421354102785',\n",
       "  'datasetB': '4785099261123493889',\n",
       "  'EuclideanDistance': 0.9137208614779578},\n",
       " {'datasetA': '4784374250884562945',\n",
       "  'datasetB': '4785099261899440129',\n",
       "  'EuclideanDistance': 0.4331920680400545},\n",
       " {'datasetA': '4784374255078866945',\n",
       "  'datasetB': '4785099262486642689',\n",
       "  'EuclideanDistance': 0.2530867335412268},\n",
       " {'datasetA': '4784714421605761025',\n",
       "  'datasetB': '4785099261878468609',\n",
       "  'EuclideanDistance': 1.4582056231955915},\n",
       " {'datasetA': '4784714421442183169',\n",
       "  'datasetB': '4785099261530341377',\n",
       "  'EuclideanDistance': 0.30355985989566586},\n",
       " {'datasetA': '4784714421442183169',\n",
       "  'datasetB': '4785099261995909121',\n",
       "  'EuclideanDistance': 0.3272159128734273},\n",
       " {'datasetA': '4784714421559623681',\n",
       "  'datasetB': '4785099262713135105',\n",
       "  'EuclideanDistance': 0.2642287915676916},\n",
       " {'datasetA': '4784714421559623681',\n",
       "  'datasetB': '4785099262377590785',\n",
       "  'EuclideanDistance': 0.5300645411993088},\n",
       " {'datasetA': '4784374251312381953',\n",
       "  'datasetB': '4785099262318870529',\n",
       "  'EuclideanDistance': 1.4879218354884238},\n",
       " {'datasetA': '4784714421832253441',\n",
       "  'datasetB': '4785099262012686337',\n",
       "  'EuclideanDistance': 0.39216118948005607},\n",
       " {'datasetA': '4784374253837352961',\n",
       "  'datasetB': '4785099261219962881',\n",
       "  'EuclideanDistance': 1.6740892800017673},\n",
       " {'datasetA': '4784714421870002177',\n",
       "  'datasetB': '4785099261681336321',\n",
       "  'EuclideanDistance': 0.3624157284007377},\n",
       " {'datasetA': '4784374253610860545',\n",
       "  'datasetB': '4785099262419533825',\n",
       "  'EuclideanDistance': 0.8898465320505424},\n",
       " {'datasetA': '4784374253883490305',\n",
       "  'datasetB': '4785099261672947713',\n",
       "  'EuclideanDistance': 0.3914006281274564},\n",
       " {'datasetA': '4784374254781071361',\n",
       "  'datasetB': '4785099261698113537',\n",
       "  'EuclideanDistance': 0.7556172690442937},\n",
       " {'datasetA': '4784374254781071361',\n",
       "  'datasetB': '4785099262578917377',\n",
       "  'EuclideanDistance': 0.6253444385698236},\n",
       " {'datasetA': '4784374254781071361',\n",
       "  'datasetB': '4785099261844914177',\n",
       "  'EuclideanDistance': 0.5693317516961258},\n",
       " {'datasetA': '4784374251010392065',\n",
       "  'datasetB': '4785099261207379969',\n",
       "  'EuclideanDistance': 0.6126738973548459},\n",
       " {'datasetA': '4784714419378585601',\n",
       "  'datasetB': '4785099261115105281',\n",
       "  'EuclideanDistance': 0.48227529957173715},\n",
       " {'datasetA': '4784374256584622081',\n",
       "  'datasetB': '4785099262687969281',\n",
       "  'EuclideanDistance': 0.44143777850764127},\n",
       " {'datasetA': '4784374256584622081',\n",
       "  'datasetB': '4785099261282877441',\n",
       "  'EuclideanDistance': 0.36488449001807793},\n",
       " {'datasetA': '4784374254814625793',\n",
       "  'datasetB': '4785099262096572417',\n",
       "  'EuclideanDistance': 0.7101707019417346},\n",
       " {'datasetA': '4784374254814625793',\n",
       "  'datasetB': '4785099262679580673',\n",
       "  'EuclideanDistance': 0.9562346649147321},\n",
       " {'datasetA': '4784714418963349505',\n",
       "  'datasetB': '4785099261488398337',\n",
       "  'EuclideanDistance': 0.919673574332116},\n",
       " {'datasetA': '4784374253669580801',\n",
       "  'datasetB': '4785099261714890753',\n",
       "  'EuclideanDistance': 0.49593127927383857},\n",
       " {'datasetA': '4784374251081695233',\n",
       "  'datasetB': '4785099261115105281',\n",
       "  'EuclideanDistance': 1.1499143625753712},\n",
       " {'datasetA': '4784714419026264065',\n",
       "  'datasetB': '4785099262713135105',\n",
       "  'EuclideanDistance': 0.3524577834100959},\n",
       " {'datasetA': '4784374251182358529',\n",
       "  'datasetB': '4785099262528585729',\n",
       "  'EuclideanDistance': 0.5600702739036332},\n",
       " {'datasetA': '4784374257578672129',\n",
       "  'datasetB': '4785099261781999617',\n",
       "  'EuclideanDistance': 0.5333821794909884},\n",
       " {'datasetA': '4784374255158558721',\n",
       "  'datasetB': '4785099261354180609',\n",
       "  'EuclideanDistance': 0.8226439740448795},\n",
       " {'datasetA': '4784374255158558721',\n",
       "  'datasetB': '4785099262562140161',\n",
       "  'EuclideanDistance': 0.5190513028970897},\n",
       " {'datasetA': '4784374255158558721',\n",
       "  'datasetB': '4785099261643587585',\n",
       "  'EuclideanDistance': 0.6238035618398919},\n",
       " {'datasetA': '4784374254978203649',\n",
       "  'datasetB': '4785099262650220545',\n",
       "  'EuclideanDistance': 0.5551370518041352},\n",
       " {'datasetA': '4784374254978203649',\n",
       "  'datasetB': '4785099261593255937',\n",
       "  'EuclideanDistance': 0.824770206848882},\n",
       " {'datasetA': '4784374257461231617',\n",
       "  'datasetB': '4785099262079795201',\n",
       "  'EuclideanDistance': 0.36308225354968354},\n",
       " {'datasetA': '4784374256358129665',\n",
       "  'datasetB': '4785099262071406593',\n",
       "  'EuclideanDistance': 0.34515223751648694},\n",
       " {'datasetA': '4784374255439577089',\n",
       "  'datasetB': '4785099261958160385',\n",
       "  'EuclideanDistance': 0.31624828705270314},\n",
       " {'datasetA': '4784714418971738113',\n",
       "  'datasetB': '4785099261907828737',\n",
       "  'EuclideanDistance': 0.47622933396260475},\n",
       " {'datasetA': '4784714418971738113',\n",
       "  'datasetB': '4785099261488398337',\n",
       "  'EuclideanDistance': 0.9136546436454773},\n",
       " {'datasetA': '4784374253984153601',\n",
       "  'datasetB': '4785099261823942657',\n",
       "  'EuclideanDistance': 1.2712226400131335},\n",
       " {'datasetA': '4784374257025024001',\n",
       "  'datasetB': '4785099261563895809',\n",
       "  'EuclideanDistance': 1.5949126393025896},\n",
       " {'datasetA': '4784374256685285377',\n",
       "  'datasetB': '4785099262310481921',\n",
       "  'EuclideanDistance': 0.6317145272198813},\n",
       " {'datasetA': '4784374255502491649',\n",
       "  'datasetB': '4785099261878468609',\n",
       "  'EuclideanDistance': 1.5639924267672962},\n",
       " {'datasetA': '4784374255502491649',\n",
       "  'datasetB': '4785099261983326209',\n",
       "  'EuclideanDistance': 0.6244899245685104},\n",
       " {'datasetA': '4784374255489908737',\n",
       "  'datasetB': '4785099261404512257',\n",
       "  'EuclideanDistance': 0.5222308780630651},\n",
       " {'datasetA': '4784374255489908737',\n",
       "  'datasetB': '4785099262104961025',\n",
       "  'EuclideanDistance': 0.3547639769540702},\n",
       " {'datasetA': '4784714419235979265',\n",
       "  'datasetB': '4785099261576478721',\n",
       "  'EuclideanDistance': 0.6635202994047181},\n",
       " {'datasetA': '4784374253170458625',\n",
       "  'datasetB': '4785099262549557249',\n",
       "  'EuclideanDistance': 0.4593930646110918},\n",
       " {'datasetA': '4784374253245956097',\n",
       "  'datasetB': '4785099262138515457',\n",
       "  'EuclideanDistance': 0.7032228797856799},\n",
       " {'datasetA': '4784714422536896513',\n",
       "  'datasetB': '4785099262239178753',\n",
       "  'EuclideanDistance': 1.9059970793460115},\n",
       " {'datasetA': '4784374255980642305',\n",
       "  'datasetB': '4785099261576478721',\n",
       "  'EuclideanDistance': 0.7484887758550074},\n",
       " {'datasetA': '4784374251559845889',\n",
       "  'datasetB': '4785099261375152129',\n",
       "  'EuclideanDistance': 0.41438460352929685},\n",
       " {'datasetA': '4784374251559845889',\n",
       "  'datasetB': '4785099261366763521',\n",
       "  'EuclideanDistance': 0.755669525463029},\n",
       " {'datasetA': '4784714422478176257',\n",
       "  'datasetB': '4785099261375152129',\n",
       "  'EuclideanDistance': 0.34491893830608994},\n",
       " {'datasetA': '4784374252956549121',\n",
       "  'datasetB': '4785099261672947713',\n",
       "  'EuclideanDistance': 0.6885974429783268},\n",
       " {'datasetA': '4784374255284387841',\n",
       "  'datasetB': '4785099261270294529',\n",
       "  'EuclideanDistance': 1.6747033293912938},\n",
       " ...]"
      ]
     },
     "execution_count": 31,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "new_similar_list = []\n",
    "for i in similar_list:\n",
    "    new_similar_list.append({\n",
    "        \"datasetA\": i.datasetA.id,\n",
    "        \"datasetB\": i.datasetB.id,\n",
    "        \"EuclideanDistance\": i.EuclideanDistance\n",
    "    })\n",
    "new_similar_list"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'4785099262440505345': {'recommend': ['4784374254525218817', '4784374257159241729', '4784374258212012033']}}\n",
      "{'4785099261291266049': {'recommend': ['4784714419479248897', '4784374254827208705', '4784714422297821185']}}\n",
      "{'4785099261299654657': {'recommend': ['4784374257524146177', '4784374256429432833', '4784374254827208705']}}\n",
      "{'4785099261974937601': {'recommend': ['4784374253292093441', '4784714419479248897', '4784374257381539841']}}\n",
      "{'4785099262096572417': {'recommend': ['4784374257524146177', '4784374253329842177', '4784374253292093441']}}\n",
      "{'4785099262549557249': {'recommend': ['4784374256429432833', '4784374254827208705', '4784714422297821185']}}\n",
      "{'4785099262419533825': {'recommend': ['4784374251513708545', '4784714422536896513', '4784374255666069505']}}\n",
      "{'4785099261354180609': {'recommend': ['4784374253292093441', '4784714419479248897', '4784374254827208705']}}\n",
      "{'4785099261186408449': {'recommend': ['4784374256429432833', '4784374256131637249', '4784374256723034113']}}\n",
      "{'4785099261798776833': {'recommend': ['4784714419479248897', '4784374254827208705', '4784714422297821185']}}\n",
      "{'4785099262725718017': {'recommend': ['4784714419479248897', '4784374254827208705', '4784374256492347393']}}\n",
      "{'4785099261878468609': {'recommend': ['4784714422297821185', '4784374258031656961', '4784714421953888257']}}\n",
      "{'4785099261052190721': {'recommend': ['4784714419479248897', '4784714422297821185', '4784374256723034113']}}\n",
      "{'4785099262616666113': {'recommend': ['4784374253292093441', '4784714419479248897', '4784374257524146177']}}\n",
      "{'4785099262209818625': {'recommend': ['4784374256131637249', '4784374251664703489', '4784374256492347393']}}\n",
      "{'4785099261148659713': {'recommend': ['4784374252801359873', '4784374254068039681', '4784374251295604737']}}\n",
      "{'4785099262281121793': {'recommend': ['4784714422297821185', '4784374254827208705', '4784374257205379073']}}\n",
      "{'4785099261698113537': {'recommend': ['4784374251295604737', '4784374253329842177', '4784714421324742657']}}\n",
      "{'4785099262327259137': {'recommend': ['4784374253329842177', '4784374252801359873', '4784374253917044737']}}\n",
      "{'4785099261433872385': {'recommend': ['4784374253292093441', '4784374254311309313', '4784374256525901825']}}\n"
     ]
    }
   ],
   "source": [
    "dict_new_similar_list = {}\n",
    "\n",
    "for i in new_similar_list:\n",
    "    dict_new_similar_list.setdefault(i.get(\"datasetB\"), []).append(i)\n",
    "    \n",
    "recommend_list = []\n",
    "\n",
    "for key, value in dict_new_similar_list.items():\n",
    "    # print(key)\n",
    "    # print(value)\n",
    "    # break\n",
    "    value.sort(key=lambda x:x['EuclideanDistance'], reverse=True)\n",
    "    # print(value)\n",
    "    if len(value) < 3:\n",
    "        recommend_list.append({\n",
    "            f\"{key}\": {\"recommend\":[i[\"datasetA\"] for i in value]}\n",
    "        })\n",
    "    else:\n",
    "        recommend_list.append({\n",
    "            f\"{key}\": {\"recommend\":[value[0][\"datasetA\"], value[1][\"datasetA\"], value[2][\"datasetA\"]]}\n",
    "        })\n",
    "    # break\n",
    "\n",
    "for i in range(20):\n",
    "    print(recommend_list[i])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['4784374254525218817', '4784374257159241729', '4784374258212012033']"
      ]
     },
     "execution_count": 39,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "recommend_list[0][f\"{list(recommend_list[0].keys())[0]}\"][\"recommend\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pymongo\n",
    "\n",
    "# 连接到 MongoDB 服务器\n",
    "client = pymongo.MongoClient(\"mongodb://mongodb:27017/\")\n",
    "\n",
    "# 选择数据库\n",
    "db = client[\"Cache\"]\n",
    "\n",
    "# 选择集合\n",
    "collection = db[\"cache\"]\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {},
   "outputs": [],
   "source": [
    "for i in collection.find({ \"id\": '4784714423266705409',}):\n",
    "    print(i)\n",
    "    break "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<pymongo.results.UpdateResult at 0x7f43a0eac640>"
      ]
     },
     "execution_count": 42,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "collection.update_one({'id': '4784714423266705409'},\n",
    "                      {\"$set\": {f\"{list(recommend_list[0].keys())[0]}\": recommend_list[0][f\"{list(recommend_list[0].keys())[0]}\"][\"recommend\"]}})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "metadata": {},
   "outputs": [],
   "source": [
    "for i in recommend_list:\n",
    "    collection.update_one({\n",
    "        'id': f'{list(i.keys())[0]}',\n",
    "    },{\n",
    "        '$set': {\n",
    "            'recommend': i[f\"{list(i.keys())[0]}\"][\"recommend\"]\n",
    "        }\n",
    "    })"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": ".venv",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.10"
  },
  "orig_nbformat": 4
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
